BibTeX bibliography tecs.bib

%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.92",
%%%     date            = "10 April 2024",
%%%     time            = "08:53:39 MST",
%%%     filename        = "tecs.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "26308 68289 362348 3437974",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; ACM Transactions on
%%%                        Embedded Computing Systems (TECS)",
%%%     license         = "public domain",
%%%     supported       = "no",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        the journal ACM Transactions on Embedded
%%%                        Computing Systems (no CODEN, ISSN 1539-9087
%%%                        (print), 1558-3465 (electronic)), for
%%%                        2002--date.
%%%
%%%                        Publication began with volume 1, number 1,
%%%                        in November 2002.  The journal appears
%%%                        quarterly, in February, May, August, and
%%%                        November.
%%%
%%%                        The journal has World-Wide Web sites at:
%%%
%%%                            http://www.acm.org/pubs/tecs
%%%                            https://dl.acm.org/loi/tecs
%%%
%%%                        Tables-of-contents of all issues are
%%%                        available at:
%%%
%%%                            http://www.acm.org/pubs/contents/journals/tecs/
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        At version 1.92, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2002 (   7)    2010 (  58)    2018 (  90)
%%%                             2003 (  24)    2011 (  19)    2019 ( 122)
%%%                             2004 (  36)    2012 (  89)    2020 (  64)
%%%                             2005 (  39)    2013 ( 152)    2021 (  59)
%%%                             2006 (  30)    2014 (  97)    2022 (  86)
%%%                             2007 (  39)    2015 (  87)    2023 ( 165)
%%%                             2008 (  47)    2016 ( 109)    2024 (  35)
%%%                             2009 (  39)    2017 ( 163)
%%%
%%%                             Article:       1656
%%%
%%%                             Total entries: 1656
%%%
%%%                        Spelling has been verified with the UNIX
%%%                        spell and GNU ispell programs using the
%%%                        exception dictionary stored in the
%%%                        companion file with extension .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
    "\ifx \undefined \pkg       \def \pkg      #1{{{\tt #1}}} \fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TECS                  = "ACM Transactions on Embedded Computing
                                  Systems"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Wolf:2002:III,
  author =       "Wayne Wolf",
  title =        "Introduction to the inaugural issue",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "1--1",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jacob:2002:ITS,
  author =       "Bruce Jacob and Shuvra Bhattacharyya",
  title =        "Introduction to the two special issues on memory",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "2--5",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Avissar:2002:OMA,
  author =       "Oren Avissar and Rajeev Barua and Dave Stewart",
  title =        "An optimal memory allocation scheme for
                 scratch-pad-based embedded systems",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "6--26",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2002:TGC,
  author =       "G. Chen and R. Shetty and M. Kandemir and N.
                 Vijaykrishnan and M. J. Irwin and M. Wolczko",
  title =        "Tuning garbage collection for reducing memory system
                 energy in an embedded {Java} environment",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "27--55",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2002:AAI,
  author =       "Jung-Hoon Lee and Shin-Dug Kim and Charles Weems",
  title =        "Application-adaptive intelligent cache memory system",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "56--78",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2002:FVL,
  author =       "Jun Yang and Rajiv Gupta",
  title =        "Frequent value locality and its applications",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "79--105",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ykman-Couvreur:2002:SLE,
  author =       "Ch. Ykman-Couvreur and J. Lambrecht and A. {Van Der
                 Togt} and F. Catthoor and H. {De Man}",
  title =        "System-level exploration of association table
                 implementations in telecom network applications",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "106--140",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jacob:2003:ITS,
  author =       "Bruce Jacob and Shuvra Bhattacharyya",
  title =        "Introduction to the two special issues on memory",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "1--4",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Benini:2003:EAD,
  author =       "Luca Benini and Alberto Macii and Massimo Poncino",
  title =        "Energy-aware design of embedded memories: a survey of
                 technologies, architectures, and optimization
                 techniques",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "5--32",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Grun:2003:APB,
  author =       "Peter Grun and Nikil Dutt and Alex Nicolau",
  title =        "Access pattern-based memory and connectivity
                 architecture exploration",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "33--73",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Qu:2003:SSS,
  author =       "Gang Qu and Miodrag Potkonjak",
  title =        "System synthesis of synchronous multimedia
                 applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "74--97",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shim:2003:LEC,
  author =       "Hojun Shim and Yongsoo Joo and Yongseok Choi and Hyung
                 Gyu Lee and Naehyuck Chang",
  title =        "Low-energy off-chip {SDRAM} memory systems for
                 embedded applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "98--130",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guang:2003:SIC,
  author =       "Gao Guang and Trevor Mudge",
  title =        "Special issue on compilers, architecture, and
                 synthesis for embedded systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "131--131",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Franke:2003:ARH,
  author =       "Bj{\"o}rn Franke and Michael O'Boyle",
  title =        "Array recovery and high-level transformations for
                 {DSP} applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "132--162",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2003:PIC,
  author =       "Soontae Kim and N. Vijaykrishnan and Mahmut Kandemir
                 and Anand Sivasubramaniam and Mary Jane Irwin",
  title =        "Partitioned instruction cache architecture for energy
                 efficiency",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "163--185",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rabbah:2003:DRD,
  author =       "Rodric M. Rabbah and Krishna V. Palem",
  title =        "Data remapping for design space optimization of
                 embedded memory systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "186--218",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2003:SRM,
  author =       "Qin Zhao and Bart Mesman and Twan Basten",
  title =        "Static resource models for code-size efficient
                 embedded processors",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "219--250",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jacome:2003:SIP,
  author =       "Margarida Jacome and Francky Catthoor",
  title =        "Special issue on power-aware embedded computing",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "251--254",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Martin:2003:CSS,
  author =       "Thomas L. Martin and Daniel P. Siewiorek and Asim
                 Smailagic and Matthew Bosworth and Matthew Ettus and
                 Jolin Warren",
  title =        "A case study of a system-level approach to power-aware
                 computing",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "255--276",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rakhmatov:2003:EMB,
  author =       "Daler Rakhmatov and Sarma Vrudhula",
  title =        "Energy management for battery-powered embedded
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "277--324",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Irani:2003:OSD,
  author =       "Sandy Irani and Sandeep Shukla and Rajesh Gupta",
  title =        "Online strategies for dynamic power management in
                 systems with multiple power-saving states",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "325--346",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2003:AMC,
  author =       "Huiyang Zhou and Mark C. Toburen and Eric Rotenberg
                 and Thomas M. Conte",
  title =        "Adaptive mode control: a static-power-efficient cache
                 design",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "347--372",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Unsal:2003:CCC,
  author =       "Osman S. Unsal and Raksit Ashok and Israel Koren and
                 C. Mani Krishna and Csaba Andras Moritz",
  title =        "{Cool-Cache}: a compiler-enabled energy efficient data
                 caching framework for embedded/multimedia processors",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "373--392",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yun:2003:EOV,
  author =       "Han-Saem Yun and Jihong Kim",
  title =        "On energy-optimal voltage scheduling for
                 fixed-priority hard real-time systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "393--430",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schurgers:2003:PME,
  author =       "Curt Schurgers and Vijay Raghunathan and Mani B.
                 Srivastava",
  title =        "Power management for energy-aware communication
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "431--447",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gordon-Ross:2003:TIC,
  author =       "Ann Gordon-Ross and Susan Cotterell and Frank Vahid",
  title =        "Tiny instruction caches for low power embedded
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "449--481",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2003:CMC,
  author =       "Kelvin Lin and Chung-Ping Chung and Jean Jyh-Jiun
                 Shann",
  title =        "Compressing {MIPS} code by multiple operand
                 dependencies",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "482--508",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Musoll:2003:SRU,
  author =       "Enric Musoll",
  title =        "Speculating to reduce unnecessary power consumption",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "509--536",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rusu:2003:MRR,
  author =       "Cosmin Rusu and Rami Melhem and Daniel Moss{\'e}",
  title =        "Maximizing rewards for real-time applications with
                 energy constraints",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "537--559",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Venkataramani:2003:ACC,
  author =       "Girish Venkataramani and Walid Najjar and Fadi Kurdahi
                 and Nader Bagherzadeh and Wim Bohm and Jeff Hammes",
  title =        "Automatic compilation to a coarse-grained
                 reconfigurable system-on-a-chip",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "560--589",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhuge:2003:CSR,
  author =       "Qingfeng Zhuge and Bin Xiao and Edwin H.-M. Sha",
  title =        "Code size reduction technique and implementation for
                 software-pipelined {DSP} applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "590--613",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gupta:2004:GES,
  author =       "Rajesh Gupta",
  title =        "Guest editorial: {Special} issue on networked embedded
                 systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Raghunathan:2004:EEW,
  author =       "Vijay Raghunathan and Saurabh Ganeriwal and Mani
                 Srivastava and Curt Schurgers",
  title =        "Energy efficient wireless packet scheduling and fair
                 queuing",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "3--23",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bulusu:2004:SCL,
  author =       "Nirupama Bulusu and John Heidemann and Deborah Estrin
                 and Tommy Tran",
  title =        "Self-configuring localization systems: Design and
                 Experimental Evaluation",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "24--60",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zou:2004:SDT,
  author =       "Yi Zou and Krishnendu Chakrabarty",
  title =        "Sensor deployment and target localization in
                 distributed sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "61--91",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gebotys:2004:DSC,
  author =       "Catherine H. Gebotys",
  title =        "Design of secure cryptography against the threat of
                 power-attacks in {DSP}-embedded processors",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "92--113",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mishra:2004:MVP,
  author =       "Prabhat Mishra and Nikil Dutt",
  title =        "Modeling and validation of pipeline specifications",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "114--139",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mishra:2004:PMC,
  author =       "Prabhat Mishra and Mahesh Mamidipaka and Nikil Dutt",
  title =        "Processor-memory coexploration using an architecture
                 description language",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "140--162",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Naik:2004:CCS,
  author =       "Mayur Naik and Jens Palsberg",
  title =        "Compiling with code-size constraints",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "163--181",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schmitz:2004:ISO,
  author =       "Marcus T. Schmitz and Bashir M. Al-Hashimi and Petru
                 Eles",
  title =        "Iterative schedule optimization for voltage scalable
                 distributed embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "182--217",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stitt:2004:ESS,
  author =       "Greg Stitt and Frank Vahid and Shawn Nematbakhsh",
  title =        "Energy savings and speedups from partitioning critical
                 software loops to hardware in embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "218--232",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lach:2004:ESI,
  author =       "John Lach and Kia Bazargan",
  title =        "Editorial: {Special} issue on dynamically adaptable
                 embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "233--236",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghiasi:2004:OAM,
  author =       "Soheil Ghiasi and Ani Nahapetian and Majid
                 Sarrafzadeh",
  title =        "An optimal algorithm for minimizing run-time
                 reconfiguration delay",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "237--256",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Robertson:2004:DFP,
  author =       "Ian Robertson and James Irvine",
  title =        "A design flow for partially reconfigurable hardware",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "257--283",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mejia-Alvarez:2004:ASS,
  author =       "Pedro Mejia-Alvarez and Eugene Levner and Daniel
                 Moss{\'e}",
  title =        "Adaptive scheduling server for power-aware real-time
                 tasks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "284--306",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2004:BAP,
  author =       "Fan Zhang and Samuel T. Chanson",
  title =        "Blocking-aware processor voltage scheduling for
                 real-time tasks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "307--335",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2004:DAF,
  author =       "Ying Zhang and Krishnendu Chakrabarty",
  title =        "Dynamic adaptation for fault tolerance and power
                 management in embedded real-time systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "336--360",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2004:DDR,
  author =       "Zhining Huang and Sharad Malik and Nahri Moreano and
                 Guido Araujo",
  title =        "The design of dynamically reconfigurable datapath
                 coprocessors",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "361--384",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Noguera:2004:MRA,
  author =       "Juanjo Noguera and Rosa M. Badia",
  title =        "Multitasking on reconfigurable architectures:
                 microarchitecture support and dynamic scheduling",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "385--406",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2004:STC,
  author =       "Chuanjun Zhang and Frank Vahid and Roman Lysecky",
  title =        "A self-tuning cache architecture for embedded
                 systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "407--425",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2004:AAA,
  author =       "Tian He and Brian M. Blum and John A. Stankovic and
                 Tarek Abdelzaher",
  title =        "{AIDA}: {Adaptive} application-independent data
                 aggregation in wireless sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "426--457",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Serpanos:2004:GES,
  author =       "Dimitrios N. Serpanos and Haris Lekatsas",
  title =        "Guest editorial: {Special} issue on embedded systems
                 and security",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "459--460",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ravi:2004:SES,
  author =       "Srivaths Ravi and Anand Raghunathan and Paul Kocher
                 and Sunil Hattangady",
  title =        "Security in embedded systems: {Design} challenges",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "461--491",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Coron:2004:SSL,
  author =       "Jean-Sebastien Coron and David Naccache and Paul
                 Kocher",
  title =        "Statistics and secret leakage",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "492--508",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wollinger:2004:EHC,
  author =       "Thomas Wollinger and Jan Pelzl and Volker
                 Wittelsberger and Christof Paar and G{\"o}kay Saldamli
                 and {\c{C}}etin K. Ko{\c{c}}",
  title =        "Elliptic and hyperelliptic curves on embedded {$ \mu
                 $P}",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "509--533",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wollinger:2004:SFS,
  author =       "Thomas Wollinger and Jorge Guajardo and Christof
                 Paar",
  title =        "Security on {FPGAs}: {State-of-the-art}
                 implementations and attacks",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "534--574",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reyhani-Masoleh:2004:EDS,
  author =       "Arash Reyhani-Masoleh and M. Anwar Hasan",
  title =        "Efficient digit-serial normal basis multipliers over
                 binary extension fields",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "575--592",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reyhani-Masoleh:2004:TFT,
  author =       "Arash Reyhani-Masoleh and M. Anwar Hasan",
  title =        "Towards fault-tolerant cryptographic computations over
                 finite fields",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "593--613",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2004:FSM,
  author =       "Rong-Tai Liu and Nen-Fu Huang and Chih-Hao Chen and
                 Chia-Nan Kao",
  title =        "A fast string-matching algorithm for network
                 processor-based intrusion detection system",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "614--633",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2004:LLS,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "{LiSP}: a lightweight security protocol for wireless
                 sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "634--660",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Harkin:2004:MOR,
  author =       "J. Harkin and T. M. McGinnity and L. P. Maguire",
  title =        "Modeling and optimizing run-time reconfiguration using
                 evolutionary computation",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "661--685",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Im:2004:DVS,
  author =       "Chaeseok Im and Soonhoi Ha and Huiseok Kim",
  title =        "Dynamic voltage scheduling with buffers in low-power
                 multimedia applications",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "686--705",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Manolache:2004:SAA,
  author =       "Sorin Manolache and Petru Eles and Zebo Peng",
  title =        "Schedulability analysis of applications with
                 stochastic task execution times",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "706--735",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Serpanos:2004:EHS,
  author =       "Dimitrios N. Serpanos and Poluxeni Mountrouidou and
                 Maria Gamvrili",
  title =        "Evaluation of hardware and software schedulers for
                 embedded switches",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "736--759",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lanotte:2004:IFH,
  author =       "Ruggero Lanotte and Andrea Maggiolo-Schettini and
                 Simone Tini",
  title =        "Information flow in hybrid systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "760--799",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2004:MBA,
  author =       "Donggang Liu and Peng Ning",
  title =        "Multilevel {$ \mu $TESLA}: {Broadcast} authentication
                 for distributed sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "800--836",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2004:RTG,
  author =       "Li-Pin Chang and Tei-Wei Kuo and Shi-Wu Lo",
  title =        "Real-time garbage collection for flash-memory storage
                 systems of real-time embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "837--863",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mueller:2005:ISI,
  author =       "Frank Mueller and Per Stenstr{\"o}m",
  title =        "Introduction to the special issue",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Krishnaswamy:2005:DCB,
  author =       "Arvind Krishnaswamy and Rajiv Gupta",
  title =        "Dynamic coalescing for 16-bit instructions",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "3--37",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Corliss:2005:IED,
  author =       "Marc L. Corliss and E. Christopher Lewis and Amir
                 Roth",
  title =        "The implementation and evaluation of dynamic code
                 decompression using {DISE}",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "38--72",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dhurjati:2005:MSG,
  author =       "Dinakar Dhurjati and Sumant Kowshik and Vikram Adve
                 and Chris Lattner",
  title =        "Memory safety without garbage collection for embedded
                 applications",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "73--111",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pop:2005:SDF,
  author =       "Paul Pop and Petru Eles and Zebo Peng",
  title =        "Schedulability-driven frame packing for multicluster
                 distributed embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "112--140",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Swaminathan:2005:PBE,
  author =       "Vishnu Swaminathan and Krishnendu Chakrabarty",
  title =        "Pruning-based, energy-optimal, deterministic {I/O}
                 device scheduling for hard real-time systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "141--167",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chiou:2005:SAS,
  author =       "Lih-yih Chiou and Swarup Bhunia and Kaushik Roy",
  title =        "Synthesis of application-specific highly efficient
                 multi-mode cores for embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "168--188",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zambreno:2005:SOA,
  author =       "Joseph Zambreno and Alok Choudhary and Rahul Simha and
                 Bhagi Narahari and Nasir Memon",
  title =        "{SAFE-OPS}: an approach to embedded software
                 security",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "189--210",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kwon:2005:OVA,
  author =       "Woo-Cheol Kwon and Taewhan Kim",
  title =        "Optimal voltage allocation techniques for dynamically
                 variable voltage processors",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "211--230",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tan:2005:EME,
  author =       "T. K. Tan and A. Raghunathan and N. K. Jha",
  title =        "Energy macromodeling of embedded operating systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "231--254",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2005:GES,
  author =       "Sandeep K. Shukla and Jean-Pierre Talpin",
  title =        "Guest editorial: {Special} issue on models and
                 methodologies for co-design of embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "225--227",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cachera:2005:VSP,
  author =       "David Cachera and Katell Morin-Allory",
  title =        "Verification of safety properties for parameterized
                 regular systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "228--266",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chouali:2005:PPM,
  author =       "S. Chouali and J. Julliand and P.-A. Masson and F.
                 Bellegarde",
  title =        "{PLTL}-partitioned model checking for reactive systems
                 under fairness assumptions",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "267--301",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gardner:2005:CCS,
  author =       "William B. Gardner",
  title =        "Converging {CSP} specifications and {C++} programming
                 via selective formalism",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "302--330",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ziller:2005:CSS,
  author =       "Roberto Ziller and Klaus Schneider",
  title =        "Combining supervisor synthesis and model checking",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "331--362",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2005:HCC,
  author =       "Chuanjun Zhang and Frank Vahid and Walid Najjar",
  title =        "A highly configurable cache for low energy embedded
                 systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "363--387",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kadayif:2005:DSO,
  author =       "I. Kadayif and M. Kandemir",
  title =        "Data space-oriented tiling for enhancing locality",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "388--414",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Muresan:2005:ICM,
  author =       "Radu Muresan and Catherine Gebotys",
  title =        "Instantaneous current modeling in a complex {VLIW}
                 processor core",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "415--451",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Petrov:2005:RCF,
  author =       "Peter Petrov and Alex Orailoglu",
  title =        "A reprogrammable customization framework for efficient
                 branch resolution in embedded processors",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "452--468",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Burns:2005:E,
  author =       "Alan Burns",
  title =        "Editorial",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "469--471",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sangiovanni-Vincentelli:2005:OES,
  author =       "Alberto L. Sangiovanni-Vincentelli and Alessandro
                 Pinto",
  title =        "An overview of embedded system design education at
                 {Berkeley}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "472--499",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Koopman:2005:UES,
  author =       "Philip Koopman and Howie Choset and Rajeev Gandhi and
                 Bruce Krogh and Diana Marculescu and Priya Narasimhan
                 and Joann M. Paul and Ragunathan Rajkumar and Daniel
                 Siewiorek and Asim Smailagic and Peter Steenkiste and
                 Donald E. Thomas and Chenxi Wang",
  title =        "Undergraduate embedded system education at {Carnegie
                 Mellon}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "500--528",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Verbauwhede:2005:SES,
  author =       "Ingrid Verbauwhede and Patrick Schaumont",
  title =        "Skiing the embedded systems mountain",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "529--548",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sztipanovits:2005:IES,
  author =       "Janos Sztipanovits and Gautam Biswas and Ken Frampton
                 and Aniruddha Gokhale and Larry Howard and Gabor Karsai
                 and T. John Koo and Xenofon Koutsoukos and Douglas C.
                 Schmidt",
  title =        "Introducing embedded software and systems education
                 and advanced learning technology in an engineering
                 curriculum",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "549--568",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seviora:2005:CES,
  author =       "Rudolph E. Seviora",
  title =        "A curriculum for embedded system engineering",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "569--586",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Caspi:2005:GGC,
  author =       "P. Caspi and A. Sangiovanni-Vincentelli and L. Almeida
                 and A. Benveniste and B. Bouyssounouse and G. Buttazzo
                 and I. Crnkovic and W. Damm and J. Engblom and G.
                 Folher and M. Garcia-Valls and H. Kopetz and Y.
                 Lakhnech and F. Laroussinie and L. Lavagno and G.
                 Lipari and F. Maraninchi and Ph. Peti and J. de la
                 Puente and N. Scaife and J. Sifakis and R. de Simone
                 and M. Torngren and P. Ver{\'\i}ssimo and A. J.
                 Wellings and R. Wilhelm and T. Willemse and W. Yi",
  title =        "Guidelines for a graduate curriculum on embedded
                 software and systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "587--611",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2005:ESC,
  author =       "Tai-Yi Huang and Chung-Ta King and Youn-Long Steve Lin
                 and Yin-Tsung Hwang",
  title =        "The embedded software consortium of {Taiwan}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "612--632",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Grimheden:2005:WES,
  author =       "Martin Grimheden and Martin T{\"o}rngren",
  title =        "What is embedded systems and how should it be
                 taught?---results from a didactic analysis",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "633--651",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2005:RDC,
  author =       "Wei Zhang and Mahmut Kandemir and Mustafa Karakoy and
                 Guangyu Chen",
  title =        "Reducing data cache leakage energy using a
                 compiler-based approach",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "652--678",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2005:DDC,
  author =       "Hyung Seok Kim and Tarek F. Abdelzaher and Wook Hyun
                 Kwon",
  title =        "Dynamic delay-constrained minimum-energy dissemination
                 in wireless sensor networks",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "679--706",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alur:2005:P,
  author =       "Rajeev Alur and Insup Lee",
  title =        "Preface",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "707--707",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tardieu:2005:LE,
  author =       "Olivier Tardieu and Robert de Simone",
  title =        "Loops in {ESTEREL}",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "708--750",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1113830.1113832",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Regehr:2005:ESO,
  author =       "John Regehr and Alastair Reid and Kirk Webb",
  title =        "Eliminating stack overflow by abstract
                 interpretation",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "751--778",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tripakis:2005:TDT,
  author =       "Stavros Tripakis and Christos Sofronis and Paul Caspi
                 and Adrian Curic",
  title =        "Translating discrete-time {Simulink} to {Lustre}",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "779--818",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1113830.1113834",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kadayif:2005:CDH,
  author =       "I. Kadayif and M. Kandemir and G. Chen and N.
                 Vijaykrishnan and M. J. Irwin and A. Sivasubramaniam",
  title =        "Compiler-directed high-level energy estimation and
                 optimization",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "819--850",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2005:ADR,
  author =       "J. Hu and M. Kandemir and N. Vijaykrishnan and M. J.
                 Irwin",
  title =        "Analyzing data reuse for cache reconfiguration",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "851--876",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2005:RFL,
  author =       "Tian He and Chengdu Huang and Brian M. Blum and John
                 A. Stankovic and Tarek F. Abdelzaher",
  title =        "Range-free localization and its impact on large scale
                 sensor networks",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "877--906",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gaujal:2005:SPA,
  author =       "Bruno Gaujal and Nicolas Navet and Cormac Walsh",
  title =        "Shortest-path algorithms for real-time scheduling of
                 {FIFO} tasks with minimal energy use",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "907--933",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bartolini:2005:OIC,
  author =       "S. Bartolini and C. A. Prete",
  title =        "Optimizing instruction cache performance of embedded
                 systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "934--965",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2006:RDL,
  author =       "W. Zhang and Y.-F. Tsai and D. Duarte and N.
                 Vijaykrishnan and M. Kandemir and M. J. Irwin",
  title =        "Reducing dynamic and leakage energy in {VLIW}
                 architectures",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "1--28",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Coussy:2006:FMH,
  author =       "Philippe Coussy and Emmanuel Casseau and Pierre Bomel
                 and Adel Baganne and Eric Martin",
  title =        "A formal method for hardware {IP} design and
                 integration under {I/O} and timing constraints",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "29--53",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Varea:2006:DFN,
  author =       "Mauricio Varea and Bashir M. Al-Hashimi and Luis A.
                 Cort{\'e}S and Petru Eles and Zebo Peng",
  title =        "{Dual Flow Nets}: {Modeling} the control\slash
                 data-flow relation in embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "54--81",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{AbouGhazaleh:2006:COS,
  author =       "Nevine AbouGhazaleh and Daniel Moss{\'e} and Bruce R.
                 Childers and Rami Melhem",
  title =        "Collaborative operating system and compiler power
                 management for real-time applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "82--115",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dean:2006:STI,
  author =       "Alexander G. Dean",
  title =        "Software thread integration for embedded system
                 display applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "116--151",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alur:2006:PAR,
  author =       "Rajeev Alur and Thao Dang and Franjo
                 Ivan{\v{c}}i{\'c}",
  title =        "Predicate abstraction for reachability analysis of
                 hybrid systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "152--199",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seth:2006:FFA,
  author =       "Kiran Seth and Aravindh Anantaraman and Frank Mueller
                 and Eric Rotenberg",
  title =        "{FAST}: {Frequency-Aware Static Timing} analysis",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "200--224",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2006:RCS,
  author =       "G. Chen and M. Kandemir and M. J. Irwin and J.
                 Ramanujam",
  title =        "Reducing code size through address register
                 assignment",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "225--258",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jerraya:2006:GEC,
  author =       "Ahmed Jerraya and Trevor Mudge",
  title =        "Guest editorial: {Concurrent} hardware and software
                 design for multiprocessor {SoC}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "259--262",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xu:2006:DMA,
  author =       "Jiang Xu and Wayne Wolf and Joerg Henkel and Srimat
                 Chakradhar",
  title =        "A design methodology for application-specific
                 networks-on-chip",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "263--280",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kangas:2006:UBM,
  author =       "Tero Kangas and Petri Kukkala and Heikki Orsila and
                 Erno Salminen and Marko H{\"a}nnik{\"a}inen and Timo D.
                 H{\"a}m{\"a}l{\"a}inen and Jouni Riihim{\"a}ki and
                 Kimmo Kuusilinna",
  title =        "{UML}-based multiprocessor {SoC} design framework",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "281--320",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hua:2006:EEE,
  author =       "Shaoxiong Hua and Gang Qu and Shuvra S.
                 Bhattacharyya",
  title =        "Energy-efficient embedded software implementation on
                 multiprocessor system-on-chip with multiple voltages",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "321--341",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hessel:2006:SRA,
  author =       "Fabiano Hessel and Vitor M. {Da Rosa} and Carlos
                 Eduardo Reif and C{\'e}sar Marcon and Tatiana {Gadelha
                 Serra Dos Santos}",
  title =        "Scheduling refinement in abstract {RTOS} models",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "342--354",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ou:2006:DSE,
  author =       "Jingzhao Ou and Viktor K. Prasanna",
  title =        "Design space exploration using arithmetic-level
                 hardware--software cosimulation for configurable
                 multiprocessor platforms",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "355--382",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Loghi:2006:CCT,
  author =       "Mirko Loghi and Massimo Poncino and Luca Benini",
  title =        "Cache coherence tradeoffs in shared-memory {MPSoCs}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "383--407",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lapalme:2006:NEE,
  author =       "James Lapalme and El Mostapha Aboulhamid and Gabriela
                 Nicolescu",
  title =        "A new efficient {EDA} tool design methodology",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "408--430",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reshadi:2006:RFI,
  author =       "Mehrdad Reshadi and Nikil Dutt and Prabhat Mishra",
  title =        "A retargetable framework for instruction-set
                 architecture simulation",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "431--452",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Memik:2006:ENP,
  author =       "Gokhan Memik and William H. Mangione-Smith",
  title =        "Evaluating {Network Processors} using {NetBench}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "453--471",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Udayakumaran:2006:DAS,
  author =       "Sumesh Udayakumaran and Angel Dominguez and Rajeev
                 Barua",
  title =        "Dynamic allocation for scratch-pad memory using
                 compile-time decisions",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "472--511",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2006:EEU,
  author =       "Haisang Wu and Binoy Ravindran and E. Douglas Jensen
                 and Peng Li",
  title =        "Energy-efficient, utility accrual scheduling under
                 resource constraints for mobile embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "513--542",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165781",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present an energy-efficient, utility accrual,
                 real-time scheduling algorithm called ReUA. ReUA
                 considers an application model where activities are
                 subject to time/utility function time constraints,
                 mutual exclusion constraints on shared non-CPU
                 resources, and statistical performance requirements on
                 individual activity timeliness behavior. The algorithm
                 targets mobile embedded systems where {\em
                 system-level\/} energy consumption is also a major
                 concern. For such a model, we consider the scheduling
                 objectives of (1) satisfying the statistical
                 performance requirements and (2) maximizing the
                 system-level energy efficiency, while respecting
                 resource constraints. Since the problem is NP-hard,
                 ReUA allocates CPU cycles using statistical properties
                 of application cycle demands, and heuristically
                 computes schedules with a polynomial time cost. We
                 analytically establish several timeliness and
                 nontimeliness properties of the algorithm. Further, our
                 simulation experiments illustrate ReUA's effectiveness
                 and superiority.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Luo:2006:EEI,
  author =       "Liqian Luo and Tarek F. Abdelzaher and Tian He and
                 John A. Stankovic",
  title =        "{EnviroSuite}: an environmentally immersive
                 programming framework for sensor networks",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "543--576",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165782",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sensor networks open a new frontier for
                 embedded-distributed computing. Paradigms for sensor
                 network programming-in-the-large have been identified
                 as a significant challenge toward developing
                 large-scale applications. Classical programming
                 languages are too low-level. This paper presents the
                 design, implementation, and evaluation of EnviroSuite,
                 a programming framework that introduces a new paradigm,
                 called environmentally immersive programming, to
                 abstract distributed interactions with the environment.
                 Environmentally immersive programming refers to an
                 object-based programming model in which individual
                 objects represent physical elements in the external
                 environment. It allows the programmer to think directly
                 in terms of environmental abstractions. EnviroSuite
                 provides language primitives for environmentally
                 immersive programming that map transparently into a
                 support library of distributed algorithms for tracking
                 and environmental monitoring. We show how nesC code of
                 realistic applications is significantly simplified
                 using EnviroSuite and demonstrate the resulting system
                 performance on Mica2 and XSM platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gebotys:2006:SMC,
  author =       "Catherine H. Gebotys",
  title =        "A split-mask countermeasure for low-energy secure
                 embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "577--612",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165783",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Future wireless embedded devices will be increasingly
                 powerful, supporting many more applications, including
                 one of the most crucial---security. Although many
                 embedded devices offer more resistance to bus---probing
                 attacks because of their compact size, susceptibility
                 to power or electromagnetic analysis attacks must be
                 analyzed. This paper presents a new split-mask
                 countermeasure to thwart low-order differential power
                 analysis (DPA) and differential EM analysis (DEMA). For
                 the first time, real-power and EM measurements are used
                 to analyze the difficulty of launching new third-order
                 DPA and DEMA attacks on a popular low-energy 32-bit
                 embedded ARM processor. Results show that the new
                 split-mask countermeasure provides increased security
                 without large overheads of energy dissipation, compared
                 to previous research. With the emergence of security
                 applications in PDAs, cell phones, and other embedded
                 devices, low-energy countermeasures for resistance to
                 low-order DPA/DEMA is crucial for supporting future
                 enabled wireless internet.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhuang:2006:PLS,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Parallelizing load\slash stores on dual-bank memory
                 embedded processors",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "613--657",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165784",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many modern embedded processors such as DSPs support
                 partitioned memory banks (also called X--Y memory or
                 dual-bank memory) along with parallel load/store
                 instructions to achieve higher code density and
                 performance. In order to effectively utilize the
                 parallel load/store instructions, the compiler must
                 partition the memory-resident values and assign them to
                 X or Y bank. This paper gives a postregister allocation
                 solution to merge the generated load/store instructions
                 into their parallel counterparts. Simultaneously, our
                 framework performs allocation of values to X or Y
                 memory banks. We first remove as many load/stores and
                 register--register moves as possible through an
                 excellent iterated coalescing based register allocator
                 by Appel and George [1996]. We then attempt to
                 parallelize the generated load/stores using a multipass
                 approach. The basic phase of our approach attempts the
                 merger of load/stores without duplication and web
                 splitting. We model this problem as a graph-coloring
                 problem in which each value is colored as either X or
                 Y. We then construct a motion scheduling graph (MSG),
                 based on the range of motion for each load/store
                 instruction. MSG reflects potential instructions that
                 could be merged. We propose a notion of pseudofixed
                 boundaries so that the load/store movement is less
                 affected by register dependencies. We prove that the
                 coloring problem for MSG is NP-complete and solve it
                 with two different heuristic algorithms with different
                 complexity. We then propose a two-level iterative
                 process to attempt instruction duplication, variable
                 duplication, web splitting, and local conflict
                 elimination to effectively merge the remaining
                 load/stores. Finally, we clean up some multiple-aliased
                 load/stores. To improve the performance, we combine
                 profiling information with each stage coupled with some
                 modifications to the algorithm. We show that our
                 framework results in parallelization of a large number
                 of load/stores without much growth in data and code
                 segments. The average speedup for our optimization pass
                 reaches roughly 13\% if no profile information is
                 available and 17\% with profile information. The
                 average code and data segment growth is controlled
                 within 13\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jones:2006:RPW,
  author =       "Alex K. Jones and Raymond Hoare and Dara Kusic and
                 Gayatri Mehta and Josh Fazekas and John Foster",
  title =        "Reducing power while increasing performance with
                 {SuperCISC}",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "658--686",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165785",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multiprocessor Systems on Chips (MPSoCs) have become a
                 popular architectural technique to increase
                 performance. However, MPSoCs may lead to undesirable
                 power consumption characteristics for computing systems
                 that have strict power budgets, such as PDAs, mobile
                 phones, and notebook computers. This paper presents the
                 super-complex instruction-set computing (SuperCISC)
                 Embedded Processor Architecture and, in particular,
                 investigates performance and power consumption of this
                 device compared to traditional processor
                 architecture-based execution. SuperCISC is a
                 heterogeneous, multicore processor architecture
                 designed to exceed performance of traditional embedded
                 processors while maintaining a reduced power budget
                 compared to low-power embedded processors. At the heart
                 of the SuperCISC processor is a multicore VLIW (Very
                 Large Instruction Word) containing several homogeneous
                 execution cores/functional units. In addition, complex
                 and heterogeneous combinational hardware function cores
                 are tightly integrated to the core VLIW engine
                 providing an opportunity for improved performance and
                 reduced energy consumption. Our SuperCISC processor
                 core has been synthesized for both a 90-nm Stratix II
                 Field Programmable Gate Aray (FPGA) and a 160-nm
                 standard cell Application-Specific Integrated Circuit
                 (ASIC) fabrication process from OKI, each operating at
                 approximately 167 MHz for the VLIW core. We examine
                 several reasons for speedup and power improvement
                 through the SuperCISC architecture, including
                 predicated control flow, cycle compression, and a
                 reduction in arithmetic power consumption, which we
                 call power compression. Finally, testing our SuperCISC
                 processor with multimedia and signal-processing
                 benchmarks, we show how the SuperCISC processor can
                 provide performance improvements ranging from 7X to
                 160X with an average of 60X, while also providing
                 orders of magnitude of power improvements for the
                 computational kernels. The power improvements for our
                 benchmark kernels range from just over 40X to over
                 400X, with an average savings exceeding 130X. By
                 combining these power and performance improvements, our
                 total energy improvements all exceed 1000X. As these
                 savings are limited to the computational kernels of the
                 applications, which often consume approximately 90\% of
                 the execution time, we expect our savings to approach
                 the ideal application improvement of 10X.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Girault:2006:ARD,
  author =       "Alain Girault and Xavier Nicollin and Marc Pouzet",
  title =        "Automatic rate desynchronization of embedded reactive
                 programs",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "687--717",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1165780.1165786",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many embedded reactive programs perform computations
                 at different rates, while still requiring the overall
                 application to satisfy very tight temporal constraints.
                 We propose a method to automatically distribute
                 programs such that the obtained parts can be run at
                 different rates, which we call rate desynchronization.
                 We consider general programs whose control structure is
                 a finite state automaton and with a DAG of actions in
                 each state. The motivation is to take into account
                 long-duration tasks inside the programs: these are
                 tasks whose execution time is long compared to the
                 other computations in the application, and whose
                 maximal execution rate is known and bounded. Merely
                 scheduling such a long duration task at a slow rate
                 would not work since the whole program would be slowed
                 down if compiled into sequential code. It would thus be
                 impossible to meet the temporal constraints, unless
                 such long duration tasks could be desynchronized from
                 the remaining computations. This is precisely what our
                 method achieves: it distributes the initial program
                 into several parts, so that the parts performing the
                 slow computations can be run at an appropriate rate,
                 therefore not impairing the global reaction time of the
                 program. We present in detail our method, all the
                 involved algorithms, and a small running example. We
                 also compare our method with the related work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Biswas:2006:MOP,
  author =       "Surupa Biswas and Thomas Carley and Matthew Simpson
                 and Bhuvan Middha and Rajeev Barua",
  title =        "Memory overflow protection for embedded systems using
                 run-time checks, reuse, and compression",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "719--752",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1196636.1196637",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems usually lack virtual memory and are
                 vulnerable to memory overflow since they lack a
                 mechanism to detect overflow or use swap space
                 thereafter. We present a method to detect memory
                 overflows using compiler-inserted software run-time
                 checks. Its overheads in run-time and energy are 1.35
                 and 1.12\%, respectively. Detection of overflow allows
                 system-specific remedial action. We also present
                 techniques to grow the stack or heap segment after they
                 overflow, into previously unutilized space, such as
                 dead variables, free holes in the heap, and space freed
                 by compressing live variables. These may avoid the
                 out-of-memory error if the space recovered is enough to
                 complete execution. The reuse methods are able to grow
                 the stack or heap beyond its overflow by an amount that
                 varies widely by application---the amount of recovered
                 space ranges from 0.7 to 93.5\% of the combined stack
                 and heap size.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "data compression; heap overflow; out-of-memory errors;
                 reliability; reuse; run-time checks; stack overflow",
}

@Article{Higuera-Toledano:2006:HSD,
  author =       "M. Teresa Higuera-Toledano",
  title =        "Hardware support for detecting illegal references in a
                 multiapplication real-time {Java} environment",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "753--772",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1196636.1196638",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Our objective is to adapt the Java memory management
                 to an embedded system, e.g., a wireless PDA executing
                 concurrent multimedia applications within a single JVM.
                 This paper provides software, and hardware-based
                 solutions detecting both illegal references across the
                 application memory spaces and dangling pointers within
                 an application space. We give an approach to
                 divide/share the memory among the applications
                 executing concurrently in the system. We introduce and
                 define application-specific memory, building upon the
                 real-time specification for Java (RTSJ) from the
                 real-time Java expert group. The memory model used in
                 RTSJ imposes strict rules for assignment between memory
                 areas, preventing the creation of dangling pointers,
                 and thus maintaining the pointer safety of Java. Our
                 implementation solution to ensure the checking of these
                 rules before each assignment inserts write barriers
                 that use a stack-based algorithm. This solution
                 adversely affects both the performance and
                 predictability of the RTSJ applications, which can be
                 improved by using an existing hardware support.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "garbage collection; memory management; write
                 barriers",
}

@Article{Winter:2006:TPC,
  author =       "Victor L. Winter and Jason Beranek and Fares Fraij and
                 Steve Roach and Greg Wickstrom",
  title =        "A transformational perspective into the core of an
                 abstract class loader for the {SSP}",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "773--818",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1196636.1196639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The SSP is a hardware implementation of a subset of
                 the JVM for use in high-consequence embedded
                 applications. In this context, a majority of the
                 activities belonging to class loading, as it is defined
                 in the specification of the JVM, can be performed
                 statically. Static class loading has the net result of
                 dramatically simplifying the design of the SSP, as well
                 as increasing its performance. Because of the high
                 consequence nature of its applications, strong evidence
                 must be provided that all aspects of the SSP have been
                 implemented correctly. This includes the class loader.
                 This article explores the possibility of formally
                 verifying a class loader for the SSP implemented in the
                 strategic programming language TL. Specifically, an
                 implementation of the core activities of an abstract
                 class loader is presented and its verification in ACL2
                 is considered.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "HATS; higher-order rewriting; SSP; strategic
                 programming; TL",
}

@Article{Kulkarni:2006:VVI,
  author =       "Prasad Kulkarni and Wankang Zhao and Stephen Hines and
                 David Whalley and Xin Yuan and Robert van Engelen and
                 Kyle Gallivan and Jason Hiser and Jack Davidson and
                 Baosheng Cai and Mark Bailey and Hwashin Moon and
                 Kyunghwan Cho and Yunheung Paek",
  title =        "{VISTA}: {VPO} interactive system for tuning
                 applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "819--863",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1196636.1196640",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software designers face many challenges when
                 developing applications for embedded systems. One major
                 challenge is meeting the conflicting constraints of
                 speed, code size, and power consumption. Embedded
                 application developers often resort to hand-coded
                 assembly language to meet these constraints since
                 traditional optimizing compiler technology is usually
                 of little help in addressing this challenge. The
                 results are software systems that are not portable,
                 less robust, and more costly to develop and maintain.
                 Another limitation is that compilers traditionally
                 apply the optimizations to a program in a fixed order.
                 However, it has long been known that a single ordering
                 of optimization phases will not produce the best code
                 for every application. In fact, the smallest unit of
                 compilation in most compilers is typically a function
                 and the programmer has no control over the code
                 improvement process other than setting flags to enable
                 or disable certain optimization phases. This paper
                 describes a new code improvement paradigm implemented
                 in a system called VISTA that can help achieve the
                 cost/performance trade-offs that embedded applications
                 demand. The VISTA system opens the code improvement
                 process and gives the application programmer, when
                 necessary, the ability to finely control it. VISTA also
                 provides support for finding effective sequences of
                 optimization phases. This support includes the ability
                 to interactively get static and dynamic performance
                 information, which can be used by the developer to
                 steer the code improvement process. This performance
                 information is also internally used by VISTA for
                 automatically selecting the best optimization sequence
                 from several attempted. One such feature is the use of
                 a genetic algorithm to search for the most efficient
                 sequence based on specified fitness criteria. We
                 include a number of experimental results that evaluate
                 the effectiveness of using a genetic algorithm in VISTA
                 to find effective optimization phase sequences.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "genetic algorithms; interactive compilation; phase
                 ordering; user-directed code improvement",
}

@Article{Ottoni:2006:OAU,
  author =       "Desiree Ottoni and Guilherme Ottoni and Guido Araujo
                 and Rainer Leupers",
  title =        "Offset assignment using simultaneous variable
                 coalescing",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "864--883",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1196636.1196641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The generation of efficient addressing code is a
                 central problem in compiling for processors with
                 restricted addressing modes, like digital signal
                 processors (DSPs). Offset assignment (OA) is the
                 problem of allocating scalar variables to memory, so as
                 to minimize the need of addressing instructions. This
                 problem is called simple offset assignment (SOA) when a
                 single address register is available, and general
                 offset assignment (GOA) when more address registers are
                 used. This paper shows how variables' liveness
                 information can be used to dramatically reduce the
                 addressing instructions required to access local
                 variables on the program stack. Two techniques that
                 make effective use of variable coalescing to solve SOA
                 and GOA are described, namely coalescing SOA (CSOA) and
                 coalescing GOA (CGOA). In addition, a thorough
                 comparison between these algorithms and others
                 described in the literature is presented. The
                 experimental results, when compiling MediaBench
                 benchmark programs with the LANCE compiler, reveal a
                 very significant improvement of the proposed techniques
                 over the other available solutions to the problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "address registers; autoincrement addressing modes;
                 DSPs; register allocation; stack offset assignment;
                 variable coalescing",
}

@Article{Whalley:2007:GE,
  author =       "David Whalley",
  title =        "Guest {Editorial}",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1216577",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kumar:2007:ESI,
  author =       "Nagendra J. Kumar and Vasanth Asokan and Siddhartha
                 Shivshankar and Alexander G. Dean",
  title =        "Efficient software implementation of embedded
                 communication protocol controllers using asynchronous
                 software thread integration with time- and
                 space-efficient procedure calls",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210270",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The overhead of context switching limits efficient
                 scheduling of multiple concurrent threads on a
                 uniprocessor when real-time requirements exist. A
                 software-implemented protocol controller may be
                 crippled by this problem. The available idle time may
                 be too short to recover through context switching, so
                 only the primary thread can execute during message
                 activity, slowing the secondary threads and potentially
                 missing deadlines. Asynchronous software thread
                 integration (ASTI) uses coroutine calls and
                 integration, letting threads make independent progress
                 efficiently, and reducing the needed context switches.
                 We demonstrate the methods with a software
                 implementation of an automotive communication protocol
                 (J1850) and several secondary threads.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "asynchronous software thread integration; fine-grain
                 concurrency; hardware to software migration; J1850;
                 software-implemented communication protocol
                 controllers",
}

@Article{Zhuang:2007:PEP,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Power-efficient prefetching for embedded processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210271",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Because of stringent power constraints, aggressive
                 latency-hiding approaches, such as prefetching, are
                 absent in the state-of-the-art embedded processors.
                 There are two main reasons that make prefetching power
                 inefficient. First, compiler-inserted prefetch
                 instructions increase code size and, therefore, could
                 increase I-cache power. Second, inaccurate prefetching
                 (especially for hardware prefetching) leads to high
                 D-cache power consumption because of useless accesses.
                 In this work, we show that it is possible to support
                 power-efficient prefetching through bit-differential
                 offset assignment. We target the prefetching of
                 relocatable stack variables with a high degree of
                 precision. By assigning the offsets of stack variables
                 in such a way that most consecutive addresses differ by
                 1 bit, we can prefetch them with compact prefetch
                 instructions to save I-cache power. The compiler first
                 generates an access graph of consecutive memory
                 references and then attempts a layout of the memory
                 locations in the smallest hypercube. Each dimension of
                 the hypercube represents a 1-bit differential
                 addressing. The embedding is carried out in as compact
                 a hypercube as possible in order to save memory space.
                 Each load/store instruction carries a hint regarding
                 prefetching the next memory reference by encoding its
                 differential address with respect to the current one.
                 To reduce D-cache power cost, we further attempt to
                 assign offsets so that most of the consecutive accesses
                 map to the same cache line. Our prefetching is done
                 using a one entry line buffer [Wilson et al. 1996].
                 Consequently, many look-ups in D-cache reduce to
                 incremental ones. This results in D-cache activity
                 reduction and power savings. Our prefetcher requires
                 both compiler and hardware support. In this paper, we
                 provide implementation on the processor model close to
                 ARM with small modification to the ISA. We tackle
                 issues such as out-of-order commit, predication, and
                 speculation through simple modifications to the
                 processor pipeline on noncritical paths. Our goal in
                 this work is to boost performance while
                 maintaining/lowering power consumption. Our results
                 show 12\% speedup and slight power reduction. The
                 runtime virtual space loss for stack and static data is
                 about 11.8\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "bit-differential addressing; data prefetching;
                 embedded processors; offset assignment",
}

@Article{Contreras:2007:XPP,
  author =       "Gilberto Contreras and Margaret Martonosi and Jinzhang
                 Peng and Guei-Yuan Lueh and Roy Ju",
  title =        "The {XTREM} power and performance simulator for the
                 {Intel XScale} core: {Design} and experiences",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210272",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Managing power concerns in microprocessors has become
                 a pressing research problem across the domains of
                 computer architecture, CAD, and compilers. As a result,
                 several parameterized cycle-level power simulators have
                 been introduced. While these simulators can be quite
                 useful for microarchitectural studies, their generality
                 limits how accurate they can be for any one chip
                 family. Furthermore, their hardware focus means that
                 they do not explicitly enable studying the interaction
                 of different software layers, such as Java applications
                 and their underlying runtime system software. This
                 paper describes and evaluates XTREM, a power-simulation
                 tool tailored for the Intel XScale microarchitecture.
                 In building XTREM, our goals were to develop a
                 microarchitecture simulator that, while still offering
                 size parameterizations for cache and other structures,
                 more accurately reflected a realistic processor
                 pipeline. We present a detailed set of validations
                 based on multimeter power measurements and hardware
                 performance counter sampling. XTREM exhibits an average
                 performance error of only 6.5\% and an even smaller
                 average power error: 4\%. The paper goes on to present
                 an application study enabled by the simulator. Namely,
                 we use XTREM to produce an energy consumption breakdown
                 for Java CDC and CLDC applications. Our simulator
                 measurements indicate that a large percentage of the
                 total energy consumption (up to 35\%) is devoted to the
                 virtual machine's support functions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Intel XScale technology; Java; power measurements;
                 power modeling",
}

@Article{DeSutter:2007:LTC,
  author =       "Bjorn {De Sutter} and Ludo {Van Put} and Dominique
                 Chanet and Bruno {De Bus} and Koen {De Bosschere}",
  title =        "Link-time compaction and optimization of {ARM}
                 executables",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210273",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The overhead in terms of code size, power consumption,
                 and execution time caused by the use of precompiled
                 libraries and separate compilation is often
                 unacceptable in the embedded world, where real-time
                 constraints, battery life-time, and production costs
                 are of critical importance. In this paper, we present
                 our link-time optimizer for the ARM architecture. We
                 discuss how we can deal with the peculiarities of the
                 ARM architecture related to its visible program counter
                 and how the introduced overhead can to a large extent
                 be eliminated. Our link-time optimizer is evaluated
                 with four tool chains, two proprietary ones from ARM
                 and two open ones based on GNU GCC. When used with
                 proprietary tool chains from ARM Ltd., our link-time
                 optimizer achieved average code size reductions of 16.0
                 and 18.5\%, while the programs have become 12.8 and
                 12.3\% faster, and 10.7 to 10.1\% more energy
                 efficient. Finally, we show how the incorporation of
                 link-time optimization in tool chains may influence
                 library interface design.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compaction; linker; optimization; performance",
}

@Article{Panainte:2007:MCR,
  author =       "Elena Moscu Panainte and Koen Bertels and Stamatis
                 Vassiliadis",
  title =        "The {Molen} compiler for reconfigurable processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210274",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we describe the compiler developed to
                 target the Molen reconfigurable processor and
                 programming paradigm. The compiler automatically
                 generates optimized binary code for C applications,
                 based on pragma annotation of the code executed on the
                 reconfigurable hardware. For the IBM PowerPC 405
                 processor included in the Virtex II Pro platform FPGA,
                 we implemented code generation, register, and stack
                 frame allocation following the PowerPC EABI (embedded
                 application binary interface). The PowerPC backend has
                 been extended to generate the appropriate instructions
                 for the reconfigurable hardware and data transfer,
                 taking into account the information of the specific
                 hardware implementations and system. Starting with an
                 annotated C application, a complete design flow has
                 been integrated to generate the executable bitstream
                 for the reconfigurable processor. The flexible design
                 of the proposed infrastructure allows to consider the
                 special features of the reconfigurable architectures.
                 In order to hide the reconfiguration latencies, we
                 implemented an instruction-scheduling algorithm for the
                 dynamic hardware configuration instructions. The
                 algorithm schedules, in advance, the hardware
                 configuration instructions, taking into account the
                 conflicts for the reconfigurable hardware resources
                 (FPGA area) between the hardware operations. To verify
                 the Molen compiler, we used the multimedia video frame
                 M-JPEG encoder of which the extended discrete cosine
                 transform (DCT*) function was mapped on the FPGA. We
                 obtained an overall speedup of 2.5 (about 84\%
                 efficiency over the maximal theoretical speedup of
                 2.96). The performance efficiency is achieved using
                 automatically generated nonoptimized DCT* hardware
                 implementation. The instruction-scheduling algorithm
                 has been tested for DCT, quantization, and VLC
                 operations. Based on simulation results, we determine
                 that, while a simple scheduling produces a significant
                 performance decrease, our proposed scheduling
                 contributes for up to $ 16 \times $ M-JPEG encoder
                 speedup.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "FPGA; instruction scheduling; reconfigurable
                 computing",
}

@Article{Tan:2007:TAP,
  author =       "Yudong Tan and Vincent Mooney",
  title =        "Timing analysis for preemptive multitasking real-time
                 systems with caches",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210275",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we propose an approach to estimate the
                 worst-case response time (WCRT) of each task in a
                 preemptive multitasking single-processor real-time
                 system utilizing an L1 cache. The approach combines
                 intertask cache-eviction analysis and intratask
                 cache-access analysis to estimate the number of cache
                 lines that can possibly be evicted by the preempting
                 task and also be accessed again by the preempted task
                 after preemptions (thus requiring the preempted task to
                 reload the cache line(s)). This cache-reload delay
                 caused by preempting task(s) is then incorporated into
                 WCRT analysis. Three sets of applications with up to
                 six concurrent tasks running are used to test our
                 approach. The experimental results show that our
                 approach can tighten the WCRT estimate by up to 32\% ($
                 1.4 \times $) over prior state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "real-time; worst-case response time",
}

@Article{Ratschan:2007:SVH,
  author =       "Stefan Ratschan and Zhikun She",
  title =        "Safety verification of hybrid systems by constraint
                 propagation-based abstraction refinement",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1210268.1210276",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper deals with the problem of safety
                 verification of nonlinear hybrid systems. We start from
                 a classical method that uses interval arithmetic to
                 check whether trajectories can move over the boundaries
                 in a rectangular grid. We put this method into an
                 abstraction refinement framework and improve it by
                 developing an additional refinement step that employs
                 interval-constraint propagation to add information to
                 the abstraction without introducing new grid elements.
                 Moreover, the resulting method allows switching
                 conditions, initial states, and unsafe states to be
                 described by complex constraints, instead of sets that
                 correspond to grid elements. Nevertheless, the method
                 can be easily implemented, since it is based on a
                 well-defined set of constraints, on which one can run
                 any constraint propagation-based solver. Tests of such
                 an implementation are promising.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "constraint propagation; hybrid systems; intervals",
}

@Article{Schepers:2007:GEI,
  author =       "Henk Schepers",
  title =        "Guest editorial: {Introduction} to the special issue
                 on software and compilers for embedded systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234676",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2007:SCT,
  author =       "Sheayun Lee and Jaejin Lee and Chang Yun Park and Sang
                 Lyul Min",
  title =        "Selective code transformation for dual instruction set
                 processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234677",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems are often constrained in terms of
                 both code size and execution time, because of a limited
                 amount of available memory and real-time nature of
                 applications. A dual instruction set processor, which
                 supports a reduced instruction set (16
                 bits/instruction), in addition to a full instruction
                 set (32 bits/instruction), allows an opportunity for a
                 tradeoff between these two design criteria.
                 Specifically, while the reduced instruction set can be
                 used to reduce code size by providing smaller
                 instructions, a program compiled into the reduced
                 instruction set typically runs slower than the same
                 program compiled into the full instruction set.
                 Motivated by this observation, we propose a code
                 generation technique that exploits this tradeoff
                 relationship by selectively using the two instruction
                 sets for different sections in the program. The
                 proposed technique, called selective code
                 transformation, not only provides a mechanism to enable
                 a flexible tradeoff between a program's code size and
                 its execution time, but also facilitates program
                 optimization toward enhancing its worst case
                 performance. The results from our experiments show that
                 our proposed technique can be effectively used to
                 fine-tune an application program on a spectrum of code
                 size and execution performance, which, in turn, enables
                 a system-wide optimization on memory space and
                 execution speed involving multiple applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "dual instruction set processors; mixed-width
                 instruction set architecture; reduced bid-width
                 instruction set architecture",
}

@Article{Zhang:2007:RBP,
  author =       "Wei Zhang and Bramha Allu",
  title =        "Reducing branch predictor leakage energy by exploiting
                 loops",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234678",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the scaling of technology, leakage energy will
                 become the dominant source of energy consumption.
                 Besides cache memories, branch predictors are among the
                 largest on-chip array structures and consume nontrivial
                 leakage energy. This paper proposes two cost-effective
                 loop-based strategies to reduce the branch predictor
                 leakage without impacting prediction accuracy or
                 performance. The loop-based approaches exploit the fact
                 that loops usually only contain a small number of
                 instructions and, hence, even fewer branch instructions
                 while taking a significant fraction of the execution
                 time. Consequently, all the nonactive entries of branch
                 predictors can be placed into the low leakage mode
                 during the loop execution in order to reduce leakage
                 energy. Compiler and circuit supports are discussed to
                 implement the proposed leakage-reduction strategies.
                 Compared to the recently proposed decay-based approach,
                 our experimental results show that the loop-based
                 approach can extract 16.2\% more dead time of the
                 branch predictor, on average, leading to more leakage
                 energy savings without impacting the branch prediction
                 accuracy and performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "branch prediction; compiler; leakage energy",
}

@Article{Scharwaechter:2007:AAE,
  author =       "Hanno Scharwaechter and David Kammler and Andreas
                 Wieferink and Manuel Hohenauer and Kingshuk Karuri and
                 Jianjiang Ceng and Rainer Leupers and Gerd Ascheid and
                 Heinrich Meyr",
  title =        "{ASIP} architecture exploration for efficient {IPSec}
                 encryption: a case study",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234679",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application-Specific Instruction-Set Processors
                 (ASIPs) are becoming increasingly popular in the world
                 of customized, application-driven System-on-Chip (SoC)
                 designs. Efficient ASIP design requires an iterative
                 architecture exploration loop---gradual refinement of
                 the processor architecture starting from an initial
                 template. To accomplish this task, design automation
                 tools are used to detect bottlenecks in embedded
                 applications, to implement application-specific
                 processor instructions, and to automatically generate
                 the required software tools (such as instruction-set
                 simulator, C-compiler, assembler, and profiler), as
                 well as to synthesize the hardware. This paper
                 describes an architecture exploration loop for an ASIP
                 coprocessor that implements common encryption
                 functionality used in symmetric block cipher algorithms
                 for internet protocol security (IPSec). The coprocessor
                 is accessed via shared memory and, as a consequence,
                 our approach is easily adaptable to arbitrary main
                 processor architectures. This paper presents the
                 extended version of our case study that has been
                 already published on the SCOPES conference in 2004. In
                 both papers, a MIPS architecture is used as the main
                 processor and Blowfish as encryption algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ADL; ASIP; computer-aided design; IPSec",
}

@Article{Turjan:2007:CIC,
  author =       "Alexandru Turjan and Bart Kienhuis and Ed Deprettere",
  title =        "Classifying interprocess communication in process
                 network representation of nested-loop programs",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "13:1--13:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234680",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "New embedded signal-processing architectures are
                 emerging that are composed of loosely coupled
                 heterogeneous components like CPUs or DSPs, specialized
                 IP cores, reconfigurable units, or memories. We believe
                 that these architectures should be programmed using the
                 process network model of computation. To ease the
                 mapping of applications, we are developing the Compaan
                 compiler that automatically derives a process network
                 (PN) description from an application written in Matlab
                 or C. In this paper, we investigate a particular
                 problem in nested loop programs, which is about
                 classifying the interprocess communication in the PN
                 representation of the nested loop program. The global
                 memory arrays present in the code have to be replaced
                 by a distributed communication structure used for
                 communicating data between the network processes. We
                 show that four types of communication exist, each
                 exhibiting different requirements when realizing them
                 in hardware or software. We first present two compile
                 time tests that are based on integer linear programming
                 to decide the type of the communication. In the second
                 part of this paper, we present alternative
                 classification techniques that have polynomial
                 complexity. However, in some cases, those techniques do
                 not give a definitive answer and the ILP tests have to
                 be applied. All present tests are combined in a hybrid
                 classification scheme that correctly classifies the
                 interprocess communication. In only 5\% of the cases to
                 classify, we have to rely on integer linear programming
                 while, in the remaining 95\%, the alternative
                 techniques presented in this paper are able to
                 correctly classify each case. The hybrid classification
                 scheme has become an important part of our Compaan
                 compiler.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "hybrid classification approach; integer linear
                 programming; matrix manipulations; static analysis",
}

@Article{Ko:2007:BSA,
  author =       "Ming-Yung Ko and Praveen K. Murthy and Shuvra S.
                 Bhattacharyya",
  title =        "Beyond single-appearance schedules: {Efficient DSP}
                 software synthesis using nested procedure calls",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1234675.1234681",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Synthesis of digital signal-processing (DSP) software
                 from dataflow-based formal models is an effective
                 approach for tackling the complexity of modern DSP
                 applications. In this paper, an efficient method is
                 proposed for applying subroutine call instantiation of
                 module functionality when synthesizing embedded
                 software from a dataflow specification. The technique
                 is based on a novel recursive decomposition of
                 subgraphs in a cluster hierarchy that is optimized for
                 low buffer size. Applying this technique, one can
                 achieve significantly lower buffer sizes than what is
                 available for minimum code size inlined schedules,
                 which have been the emphasis of prior work on software
                 synthesis. Furthermore, it is guaranteed that the
                 number of procedure calls in the synthesized program is
                 polynomially bounded in the size of the input dataflow
                 graph, even though the number of module invocations may
                 increase exponentially. This recursive decomposition
                 approach provides an efficient means for integrating
                 subroutine-based module instantiation into the design
                 space of DSP software synthesis. The experimental
                 results demonstrate a significant improvement in buffer
                 cost, especially for more irregular multirate DSP
                 applications, with moderate code and execution time
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "block diagram compiler; design methodology; embedded
                 systems; hierarchical graph decomposition; memory
                 optimization; procedural implementation; synchronous
                 dataflow",
}

@Article{Hua:2007:PDM,
  author =       "Shaoxiong Hua and Gang Qu and Shuvra S.
                 Bhattacharyya",
  title =        "Probabilistic design of multimedia embedded systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275987",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we propose the novel concept of
                 probabilistic design for multimedia embedded systems,
                 which is motivated by the challenge of how to design,
                 but not overdesign, such systems while systematically
                 incorporating performance requirements of multimedia
                 application, uncertainties in execution time, and
                 tolerance for reasonable execution failures. Unlike
                 most present techniques that are based on either worst-
                 or average-case execution times of application tasks,
                 where the former guarantees the completion of each
                 execution, but often leads to overdesigned systems, and
                 the latter fails to provide any completion guarantees,
                 the proposed probabilistic design method takes
                 advantage of unique features mentioned above of
                 multimedia systems to relax the rigid hardware
                 requirements for software implementation and avoid
                 overdesigning the system. In essence, this relaxation
                 expands the design space and we further develop an
                 off-line on-line minimum effort algorithm for quick
                 exploration of the enlarged design space at early
                 design stages. This is the first step toward our goal
                 of bridging the gap between real-time analysis and
                 embedded software implementation for rapid and economic
                 multimedia system design. It is our belief that the
                 proposed method has great potential in reducing system
                 resource while meeting performance requirements. The
                 experimental results confirm this as we achieve
                 significant saving in system's energy consumption to
                 provide a statistical completion ratio guarantee (i.e.,
                 the expected number of completions over a large number
                 of iterations is greater than a given value).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "completion ratio; energy minimization;
                 hardware/software codesign; multiple voltage;
                 probabilistic design; soft real-time system",
}

@Article{Koushanfar:2007:TMC,
  author =       "Farinaz Koushanfar and Abhijit Davare and David T.
                 Nguyen and Alberto Sangiovanni-Vincentelli and Miodrag
                 Potkonjak",
  title =        "Techniques for maintaining connectivity in wireless
                 ad-hoc networks under energy constraints",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275988",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed wireless systems (DWSs) are emerging as
                 the enabler for next-generation wireless applications.
                 There is a consensus that DWS-based applications, such
                 as pervasive computing, sensor networks, wireless
                 information networks, and speech and data communication
                 networks, will form the backbone of the next
                 technological revolution. Simultaneously, with great
                 economic, industrial, consumer, and scientific
                 potential, DWSs pose numerous technical challenges.
                 Among them, two are widely considered as crucial:
                 autonomous localized operation and minimization of
                 energy consumption. We address the fundamental problem
                 of how to maximize the lifetime of the network using
                 only local information, while preserving network
                 connectivity. We start by introducing the care-free
                 sleep (CS) Theorem that provides provably optimal
                 conditions for a node to go into sleep mode while
                 ensuring that global connectivity is not affected. The
                 CS theorem is the basis for an efficient localized
                 algorithm that decides which nodes will go to into
                 sleep mode and for how long. We have also developed
                 mechanisms for collecting neighborhood information and
                 for the coordination of distributed energy minimization
                 protocols. The effectiveness of the approach is
                 demonstrated using a comprehensive study of the
                 performance of the algorithm over a wide range of
                 network parameters. Another important highlight is the
                 first mathematical and Monte Carlo analysis that
                 establishes the importance of considering nodes within
                 a small number of hops in order to preserve energy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ad-hoc networks; connectivity; energy management; low
                 power; power management; sleeping coordination",
}

@Article{Wagner:2007:HSI,
  author =       "Fl{\'a}vio R. Wagner and Wander Ces{\'a}rio and Ahmed
                 A. Jerraya",
  title =        "Hardware\slash software {IP} integration using the
                 {ROSES} design environment",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "17:1--17:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275989",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Considering current time-to-market pressures, IP reuse
                 is mandatory for the design of complex embedded
                 systems-on-chip (SoC). The integration of IP components
                 into a given design is the most complex task in the
                 whole reuse process. This paper describes the IP
                 integration approach implemented in the ROSES design
                 environment, which presents a unique combination of
                 features that enhance IP reuse: automatic assembly of
                 interfaces between heterogeneous software and hardware
                 IP components; easy adaptation to different on-chip
                 communication structures and bus and core standards;
                 generation of customized and minimal OSs for
                 programmable components; and an
                 architecture-independent high-level API embedded into
                 SystemC that makes application software independent
                 from system implementation. Application code is written
                 by using communication functions available in this API.
                 ROSES automatically assembles wrappers that implement
                 these functions, such that the application code does
                 not need to be modified in order to run in the final
                 synthesized system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "IP integration; systems-on-chip",
}

@Article{Lee:2007:LBB,
  author =       "Sang-Won Lee and Dong-Joo Park and Tae-Sun Chung and
                 Dong-Ho Lee and Sangwon Park and Ha-Joo Song",
  title =        "A log buffer-based flash translation layer using
                 fully-associative sector translation",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "18:1--18:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Flash memory is being rapidly deployed as data storage
                 for mobile devices such as PDAs, MP3 players, mobile
                 phones, and digital cameras, mainly because of its low
                 electronic power, nonvolatile storage, high
                 performance, physical stability, and portability. One
                 disadvantage of flash memory is that prewritten data
                 cannot be dynamically overwritten. Before overwriting
                 prewritten data, a time-consuming erase operation on
                 the used blocks must precede, which significantly
                 degrades the overall write performance of flash memory.
                 In order to solve this ``erase-before-write'' problem,
                 the flash memory controller can be integrated with a
                 software module, called ``flash translation layer
                 (FTL).'' Among many FTL schemes available, the log
                 block buffer scheme is considered to be optimum. With
                 this scheme, a small number of log blocks, a kind of
                 write buffer, can improve the performance of write
                 operations by reducing the number of erase operations.
                 However, this scheme can suffer from low space
                 utilization of log blocks. In this paper, we show that
                 there is much room for performance improvement in the
                 log buffer block scheme, and propose an enhanced log
                 block buffer scheme, called FAST (full associative
                 sector translation). Our FAST scheme improves the space
                 utilization of log blocks using fully-associative
                 sector translations for the log block sectors. We also
                 show empirically that our FAST scheme outperforms the
                 pure log block buffer scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "address translation; associative mapping; flash
                 memory; FTL; log blocks",
}

@Article{Wu:2007:EBT,
  author =       "Chin-Hsien Wu and Tei-Wei Kuo and Li Ping Chang",
  title =        "An efficient {B-tree} layer implementation for
                 flash-memory storage systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the significant growth of the markets for
                 consumer electronics and various embedded systems,
                 flash memory is now an economic solution for storage
                 systems design. Because index structures require
                 intensively fine-grained updates/modifications,
                 block-oriented access over flash memory could introduce
                 a significant number of redundant writes. This might
                 not only severely degrade the overall performance, but
                 also damage the reliability of flash memory. In this
                 paper, we propose a very different approach, which can
                 efficiently handle fine-grained updates/modifications
                 caused by B-tree index access over flash memory. The
                 implementation is done directly over the flash
                 translation layer (FTL); hence, no modifications to
                 existing application systems are needed. We demonstrate
                 that when index structures are adopted over flash
                 memory, the proposed methodology can significantly
                 improve the system performance and, at the same time,
                 reduce both the overhead of flash-memory management and
                 the energy dissipation. The average response time of
                 record insertions and deletions was also significantly
                 reduced.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "B-tree; database systems; embedded systems; flash
                 memory; storage systems",
}

@Article{Xie:2007:ISP,
  author =       "Tao Xie and Xiao Qin",
  title =        "Improving security for periodic tasks in embedded
                 systems through scheduling",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1275986.1275992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While many scheduling algorithms for periodic tasks
                 ignore security requirements posed by sensitive
                 applications and are, consequently, unable to perform
                 properly in embedded systems with security constraints,
                 in this paper, we present an approach to scheduling
                 periodic tasks in embedded systems subject to security
                 and timing constraints. We design a necessary and
                 sufficient feasibility check for a set of periodic
                 tasks with security requirements. With the feasibility
                 test in place, we propose a scheduling algorithm, or
                 SASES (security-aware scheduling for embedded systems),
                 which accounts for both security and timing
                 requirements. SASES judiciously distributes slack times
                 among a variety of security services for a set of
                 periodic tasks, thereby optimizing security for
                 embedded systems without sacrificing schedulability. To
                 demonstrate the effectiveness of SASES, we apply the
                 proposed SASES to real-world embedded systems such as
                 an automated flight control system. We show, through
                 extensive simulations, that SASES is able to maximize
                 security for embedded systems while guaranteeing
                 timeliness. In particular, SASES significantly improves
                 security over three baseline algorithms by up to
                 107\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded systems; periodic tasks; real-time systems;
                 scheduling; security-sensitive applications",
}

@Article{Gupta:2007:ISL,
  author =       "Rajiv Gupta and Yunheung Paek",
  title =        "Introduction to the special {LCTES'05} issue",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "21:1--21:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274859",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gay:2007:SDP,
  author =       "David Gay and Philip Levis and David Culler",
  title =        "Software design patterns for {TinyOS}",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "22:1--22:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274860",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present design patterns used by software components
                 in the TinyOS sensor network operating system. They
                 differ significantly from traditional software design
                 patterns because of the constraints of sensor networks
                 and to TinyOS's focus on static allocation and
                 whole-program composition. We describe how nesC has
                 evolved to support these design patterns by including a
                 few simple language primitives and optimizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "design patterns; embedded systems; nesC; TinyOS",
}

@Article{Chanet:2007:ARM,
  author =       "Dominique Chanet and Bjorn {De Sutter} and Bruno {De
                 Bus} and Ludo {Van Put} and Koen {De Bosschere}",
  title =        "Automated reduction of the memory footprint of the
                 {Linux} kernel",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "23:1--23:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274861",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The limited built-in configurability of Linux can lead
                 to expensive code size overhead when it is used in the
                 embedded market. To overcome this problem, we propose
                 the application of link-time compaction and
                 specialization techniques that exploit the a priori
                 known, fixed runtime environment of many embedded
                 systems. In experimental setups based on the ARM XScale
                 and i386 platforms, the proposed techniques are able to
                 reduce the kernel memory footprint with over 16\%. We
                 also show how relatively simple additions to existing
                 binary rewriters can implement the proposed techniques
                 for a complex, very unconventional program, such as the
                 Linux kernel. We note that even after specialization, a
                 lot of seemingly unnecessary code remains in the kernel
                 and propose to reduce the footprint of this code by
                 applying code-compression techniques. This technique,
                 combined with the previous ones, reduces the memory
                 footprint with over 23\% for the i386 platform and 28\%
                 for the ARM platform. Finally, we pinpoint an important
                 code size growth problem when compaction and
                 compression techniques are combined on the ARM
                 platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compaction; compression; Linux kernel; operating
                 system; specialization; system calls",
}

@Article{Sassone:2007:SSS,
  author =       "Peter G. Sassone and D. Scott Wills and Gabriel H.
                 Loh",
  title =        "Static strands: {Safely} exposing dependence chains
                 for increasing embedded power efficiency",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "24:1--24:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274862",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded processors are designed to maximize
                 execution efficiency---the amount of performance
                 achieved per unit of energy dissipated while meeting
                 minimum performance levels. To increase this
                 efficiency, we propose utilizing static strands,
                 dependence chains without fan-out, which are exposed by
                 a compiler pass. These dependent instructions are
                 resequenced to be sequential and annotated to
                 communicate their location to the hardware.
                 Importantly, this modified application is binary
                 compatible and functionally identical to the original,
                 allowing transparent execution on a baseline processor.
                 However, these static strands can be easily collapsed
                 and optimized by simple processor modifications,
                 significantly reducing the workload energy. Results
                 show that over 30\% of MediaBench and Spec2000int
                 dynamic instructions can be collapsed, reducing issue
                 logic energy by 20\%, bypass energy 19\%, and register
                 file energy 14\%. In addition, by increasing the
                 effective capacity of pipeline resources by almost a
                 third, average IPC can be improved up to 15\%. This
                 performance gain can then be traded in for a lower
                 clock frequency to maintain a basline level of
                 performance, further reducing energy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "architecture; dependency collapsing; energy;
                 sequentiality",
}

@Article{Staschulat:2007:SPC,
  author =       "Jan Staschulat and Rolf Ernst",
  title =        "Scalable precision cache analysis for real-time
                 software",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "25:1--25:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274863",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches are needed to increase the processor
                 performance, but the temporal behavior is difficult to
                 predict, especially in embedded systems with preemptive
                 scheduling. Current approaches use simplified
                 assumptions or propose complex analysis algorithms to
                 bound the cache-related preemption delay. In this
                 paper, a scalable preemption delay analysis for
                 associative instruction caches to control the analysis
                 precision and the time-complexity is proposed. An
                 accurate preemption delay calculation is integrated
                 into a cache-aware schedulability analysis. The
                 framework is evaluated in several experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "cache; embedded systems; preemptive scheduling;
                 worst-case execution time analysis",
}

@Article{Varma:2007:AFS,
  author =       "Ankush Varma and Bruce Jacob and Eric Debes and Igor
                 Kozintsev and Paul Klein",
  title =        "Accurate and fast system-level power modeling: an
                 {XScale}-based case study",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "26:1--26:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274864",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Accurate and fast system modeling is central to the
                 rapid design space exploration needed for
                 embedded-system design. With fast, complex SoCs playing
                 a central role in such systems, system designers have
                 come to require MIPS-range simulation speeds and
                 near-cycle accuracy. The sophisticated simulation
                 frameworks that have been developed for high-speed
                 system performance modeling do not address power
                 consumption, although it is a key design constraint. In
                 this paper, we define a simulation-based methodology
                 for extending system performance-modeling frameworks to
                 also include power modeling. We demonstrate the use of
                 this methodology with a case study of a real, complex
                 embedded system, comprising the Intel XScale{\reg}g
                 embedded microprocessor, its WMMX{\trademark} SIMD
                 coprocessor, L1 caches, SDRAM and the on-board address
                 and data buses. We describe detailed power models for
                 each of these components and validate them against
                 physical measurements from hardware, demonstrating that
                 such frameworks enable designers to model both power
                 and performance at high speeds without sacrificing
                 accuracy. Our results indicate that the power estimates
                 obtained are accurate within 5\% of physical
                 measurements from hardware, while simulation speeds
                 consistently exceed a million instructions per second
                 (MIPS).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded systems; power modeling; SystemC",
}

@Article{Carta:2007:CTA,
  author =       "Salvatore Carta and Andrea Alimonda and Alessandro
                 Pisano and Andrea Acquaviva and Luca Benini",
  title =        "A control theoretic approach to energy-efficient
                 pipelined computation in {MPSoCs}",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "27:1--27:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274865",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, we describe a control theoretic approach
                 to dynamic voltage/frequency scaling (DVFS) in a
                 pipelined MPSoC architecture with soft real-time
                 constraints, aimed at minimizing energy consumption
                 with throughput guarantees. Theoretical analysis and
                 experiments carried out on a cycle-accurate,
                 energy-aware, and multiprocessor simulation platform
                 are provided. We give a dynamic model of the system
                 behavior which allows to synthesize linear and
                 nonlinear feedback control schemes for the run-time
                 adjustment of the core frequencies. We study the
                 characteristics of the proposed techniques in both
                 transient and steady-state conditions. Finally, we
                 compare the proposed feedback approaches and local DVFS
                 policies from an energy consumption viewpoint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "DVFS; feedback-control techniques; MPSoC; parallel
                 systems",
}

@Article{Crenshaw:2007:RIE,
  author =       "Tanya L. Crenshaw and Spencer Hoke and Ajay Tirumala
                 and Marco Caccamo",
  title =        "Robust implicit {EDF}: a wireless {MAC} protocol for
                 collaborative real-time systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "28:1--28:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274866",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in wireless technology have brought us closer
                 to extensive deployment of distributed real-time
                 embedded systems connected through a wireless channel.
                 The medium-access control (MAC) layer protocol is
                 critical in providing a real-time guarantee. We have
                 devised a real-time wireless MAC protocol, robust
                 implicit earliest deadline first, or RI-EDF. Packets
                 are transmitted according to EDF scheduling rules,
                 offering a protocol that implicitly avoids contention.
                 In the event of a packet loss or a node failure, every
                 node has the opportunity to recover the schedule based
                 on a static recovery priority, offering a protocol that
                 is robust with no central point of failure. We
                 demonstrate in simulations that RI-EDF provides better
                 goodput and lower packet loss than existing protocols
                 like 802.11 PCF and EDCF. In our implementation and
                 distributed control test-bed, we show that RI-EDF
                 provides better throughput than the TinyOS MAC-layer
                 protocol. Overall, RI-EDF provides predictable temporal
                 behavior with minimal impact on node failures, packet
                 losses, and noise in the channel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "earliest deadline first; medium-access control; real
                 time; wireless",
}

@Article{Quan:2007:EED,
  author =       "Gang Quan and Xiaobo Sharon Hu",
  title =        "Energy efficient {DVS} schedule for fixed-priority
                 real-time systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "29:1--29:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274867",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy consumption has become an increasingly
                 important consideration in designing many real-time
                 embedded systems. Variable voltage processors, if used
                 properly, can dramatically reduce such system energy
                 consumption. In this paper, we present a technique to
                 determine voltage settings for a variable voltage
                 processor that utilizes a fixed-priority assignment to
                 schedule jobs. By exploiting more efficiently the
                 processor slack time, our approach can be more
                 effective in reducing the execution speed for real-time
                 tasks when necessary. Our approach also produces the
                 minimum constant voltage needed to feasibly schedule
                 the entire job set. With both randomly generated and
                 practical examples, our heuristic approach can achieve
                 the dynamic energy reduction very close to the
                 theoretically optimal one (within 2\%) with much less
                 computation cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "dynamic voltage scaling; fixed-priority scheduling;
                 low power; real time",
}

@Article{Rao:2007:EOS,
  author =       "Ravishankar Rao and Sarma Vrudhula",
  title =        "Energy optimal speed control of a producer--consumer
                 device pair",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "30:1--30:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274868",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a modular approach for minimizing the total
                 energy consumed by a pair of generic communicating
                 devices (producer--consumer scenario) by jointly
                 controlling their speed profiles. Each device (like a
                 CPU, or disk drive) is assumed to have a controllable
                 variable called its speed (e.g., a CPU's clock
                 frequency, a disk drive's spindle motor speed) that
                 affects its power consumption and performance (e.g.,
                 throughput, data transfer rate). The device and task
                 models we analyzed were inspired by applications like
                 CD recording (hard drive to CD drive data transfer) and
                 data processing (disk drive to CPU data transfer). The
                 proposed solution can be used for any pair of devices
                 with convex (for continuous speed sets) or W-convex (a
                 discrete version of a convex function for discrete
                 speed sets) power--speed relationships. For discrete
                 speed sets, the method operates directly on the
                 power--speed values and does not require an analytical
                 relationship between power and speed. The key to
                 solving the two-device optimization problem was the
                 observation that it could be split into two single
                 device parametric optimization problems, where the
                 parameters correspond to the common task that both the
                 devices must execute. The following divide-and-conquer
                 approach is proposed: [divide] the optimal speed policy
                 and energy consumption of each device is derived as an
                 analytical function of its task parameters; [conquer]
                 the optimal values of these parameters are found by
                 minimizing the sum of the parameterized energy
                 functions and plugged back into the parameterized speed
                 profiles. The main advantage of this approach is that
                 each device can be characterized independently and this
                 allows system designers to mix and match
                 manufacturer-supplied device energy curves to evaluate
                 and optimize different application scenarios. We
                 demonstrate our approach using three device
                 characterization examples (for a CD drive, hard drive,
                 and a CPU) and two application scenarios (CD recording,
                 MD5 checksum computation).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "disk drive; energy optimization; joint optimization;
                 processor; speed control",
}

@Article{Loghi:2007:PMM,
  author =       "Mirko Loghi and Luca Benini and Massimo Poncino",
  title =        "Power macromodeling of {MPSoC} message passing
                 primitives",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "31:1--31:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274869",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Estimating the energy consumption of software in
                 multiprocessor systems-on-chip (MPSoCs) is crucial for
                 enabling quick evaluations of both software and
                 hardware optimizations. However, high-level estimations
                 should be applicable at software level, possibly
                 constructing effective power models depending on
                 parameters that can be extracted directly from the
                 application characteristics. We propose a methodology
                 for accurate analysis of power consumption of
                 message-passing primitives in a MPSoC, and, in
                 particular, an energy model which, in spite of its
                 simplicity, allows to model the traffic-dependent
                 nature of energy consumption through the use of a
                 single, abstract parameter, namely, the size of the
                 message exchanged.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "communication primitives; macromodeling;
                 multiprocessor; system-on-chip",
}

@Article{Kansal:2007:PME,
  author =       "Aman Kansal and Jason Hsu and Sadaf Zahedi and Mani B.
                 Srivastava",
  title =        "Power management in energy harvesting sensor
                 networks",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "32:1--32:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1274858.1274870",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Power management is an important concern in sensor
                 networks, because a tethered energy infrastructure is
                 usually not available and an obvious concern is to use
                 the available battery energy efficiently. However, in
                 some of the sensor networking applications, an
                 additional facility is available to ameliorate the
                 energy problem: harvesting energy from the environment.
                 Certain considerations in using an energy harvesting
                 source are fundamentally different from that in using a
                 battery, because, rather than a limit on the maximum
                 energy, it has a limit on the maximum rate at which the
                 energy can be used. Further, the harvested energy
                 availability typically varies with time in a
                 nondeterministic manner. While a deterministic metric,
                 such as residual battery, suffices to characterize the
                 energy availability in the case of batteries, a more
                 sophisticated characterization may be required for a
                 harvesting source. Another issue that becomes important
                 in networked systems with multiple harvesting nodes is
                 that different nodes may have different harvesting
                 opportunity. In a distributed application, the same
                 end-user performance may be achieved using different
                 workload allocations, and resultant energy consumptions
                 at multiple nodes. In this case, it is important to
                 align the workload allocation with the energy
                 availability at the harvesting nodes. We consider the
                 above issues in power management for energy-harvesting
                 sensor networks. We develop abstractions to
                 characterize the complex time varying nature of such
                 sources with analytically tractable models and use them
                 to address key design issues. We also develop
                 distributed methods to efficiently use harvested energy
                 and test these both in simulation and experimentally on
                 an energy-harvesting sensor network, prototyped for
                 this work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "adaptive duty cycling; energy neutrality; Heliomote;
                 lifetime; power management",
}

@Article{Bueno:2007:RRP,
  author =       "David Bueno and Chris Conger and Alan D. George and
                 Ian Troxel and Adam Leko",
  title =        "{RapidIO} for radar processing in advanced space
                 systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:38",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324970",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Space-based radar is a suite of applications that
                 presents many unique system design challenges. In this
                 paper, we investigate use of RapidIO, a new
                 high-performance embedded systems interconnect, in
                 addressing issues associated with the high network
                 bandwidth requirements of real-time ground moving
                 target indicator (GMTI), and synthetic aperture Radar
                 (SAR) applications in satellite systems. Using
                 validated simulation, we study several critical issues
                 related to the RapidIO network and algorithms under
                 study. The results show that RapidIO is a promising
                 platform for space-based radar using emerging
                 technology, providing network bandwidth to enable
                 parallel computation previously unattainable in an
                 embedded satellite system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ground-moving target indicator; RapidIO; space-based
                 radar; synthetic aperture radar",
}

@Article{Fei:2007:EOS,
  author =       "Yunsi Fei and Srivaths Ravi and Anand Raghunathan and
                 Niraj K. Jha",
  title =        "Energy-optimizing source code transformations for
                 operating system-driven embedded software",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:26",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324971",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes four types of source code
                 transformations for operating system (OS)-driven
                 embedded software programs to reduce their energy
                 consumption. Their key features include spanning of
                 process boundaries and minimization of the energy
                 consumed in the execution of OS
                 services---opportunities which are beyond the reach of
                 conventional compiler optimizations and source code
                 transformations. We have applied the proposed
                 transformations to several multiprocess benchmark
                 programs in the context of an embedded Linux OS running
                 on an Intel StrongARM processor. They achieve up to
                 37.9\% (23.8\%, on average) energy reduction compared
                 to highly compiler-optimized implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "energy consumption; Linux; source code
                 transformations",
}

@Article{Zhu:2007:ESA,
  author =       "Yifan Zhu and Frank Mueller",
  title =        "Exploiting synchronous and asynchronous {DVS} for
                 feedback {EDF} scheduling on an embedded platform",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:26",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324972",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Contemporary processors support dynamic voltage
                 scaling (DVS) to reduce power consumption by varying
                 processor voltage/frequency dynamically. We develop
                 power-aware feedback--DVS algorithms for hard real-time
                 systems that adapt to dynamically changing workloads.
                 The algorithms lower execution speed while guaranteeing
                 timing constraints. We study energy consumption for
                 synchronous and asynchronous DVS switching on a PowerPC
                 board. Energy, measured via data acquisition, is
                 reduced up to 70\% over na{\"\i}ve DVS for our feedback
                 scheme with 24\% peak savings over previous algorithms.
                 These results, albeit differing in quantity, confirm
                 trends observed under simulation. They are the first of
                 their kind on an embedded board.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "dynamic voltage scaling; feedback control; real-time
                 systems; scheduling",
}

@Article{Vera:2007:DCL,
  author =       "Xavier Vera and Bj{\"o}rn Lisper and Jingling Xue",
  title =        "Data cache locking for tight timing calculations",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "4:1--4:38",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324973",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches have become increasingly important with the
                 widening gap between main memory and processor speeds.
                 Small and fast cache memories are designed to bridge
                 this discrepancy. However, they are only effective when
                 programs exhibit sufficient data locality. In addition,
                 caches are a source of unpredictability, resulting in
                 programs sometimes behaving in a different way than
                 expected. Detailed information about the number of
                 cache misses and their causes allows us to predict
                 cache behavior and to detect bottlenecks. Small
                 modifications in the source code may change memory
                 patterns, thereby altering the cache behavior. Code
                 transformations, which take the cache behavior into
                 account, might result in a high cache performance
                 improvement. However, cache memory behavior is very
                 hard to predict, thus making the task of optimizing and
                 timing cache behavior very difficult. This article
                 proposes and evaluates a new compiler framework that
                 times cache behavior for multitasking systems. Our
                 method explores the use of cache partitioning and
                 dynamic cache locking to provide worst-case performance
                 estimates in a safe and tight way for multitasking
                 systems. We use cache partitioning, which divides the
                 cache among tasks to eliminate intertask cache
                 interferences. We combine static cache analysis and
                 cache-locking mechanisms to ensure that all intratask
                 conflicts, and consequently, memory access times, are
                 exactly predictable. The results of our experiments
                 demonstrate the capability of our framework to describe
                 cache behavior at compile time. We compare our timing
                 approach with a system equipped with a nonpartitioned,
                 but statically, locked data cache. Our method
                 outperforms static cache locking for all analyzed task
                 sets under various cache architectures, demonstrating
                 that our fully predictable scheme does not compromise
                 the performance of the transformed programs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "data cache analysis; embedded systems; safety critical
                 systems; worst-case execution time",
}

@Article{Armbruster:2007:RTJ,
  author =       "Austin Armbruster and Jason Baker and Antonio Cunei
                 and Chapman Flack and David Holmes and Filip Pizlo and
                 Edward Pla and Marek Prochazka and Jan Vitek",
  title =        "A real-time {Java} virtual machine with applications
                 in avionics",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "5:1--5:49",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324974",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper reports on our experience with the
                 implementation of the Real-time Specification for Java
                 on the Ovm open source Java virtual machine. We
                 describe the architecture and main design decisions
                 involved in implementing real-time Java on Ovm. We
                 present the first use of Real-time Java in avionics in
                 the context of control software for a ScanEagle
                 Unmanned Aerial Vehicle.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "avionics; memory management; Real-Time Java; virtual
                 machines",
}

@Article{Mangeruca:2007:USU,
  author =       "Leonardo Mangeruca and Massimo Baleani and Alberto
                 Ferrari and Alberto Sangiovanni-Vincentelli",
  title =        "Uniprocessor scheduling under precedence constraints
                 for embedded systems design",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "6:1--6:30",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324975",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we present a novel approach to the
                 constrained scheduling problem, while addressing a more
                 general class of constraints that arise from the timing
                 requirements on real-time embedded controllers. We
                 provide general necessary and sufficient conditions for
                 scheduling under precedence constraints and derive
                 sufficient conditions for two well-known scheduling
                 policies. We define mathematical problems that provide
                 optimum priority and deadline assignments, while
                 ensuring both precedence constraints and system's
                 schedulability. We show how these problems can be
                 relaxed to corresponding integer linear programming
                 (ILP) formulations leveraging on available solvers. The
                 results are demonstrated on a real design case.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "design of embedded systems; embedded software;
                 precedence constraints; real-time scheduling",
}

@Article{Bordoloi:2007:ISA,
  author =       "Unmesh D. Bordoloi and Samarjit Chakraborty",
  title =        "Interactive schedulability analysis",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "7:1--7:27",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324969.1324976",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A typical design process for real-time embedded
                 systems involves choosing the values of certain system
                 parameters and performing a schedulability analysis to
                 determine whether all deadline constraints can be
                 satisfied. If such an analysis returns a negative
                 answer, then some of the parameters are modified and
                 the analysis is invoked once again. This iteration is
                 repeated until a schedulable design is obtained.
                 However, the schedulability analysis problem for most
                 task models is intractable (usually co-NP hard) and,
                 hence, such an iterative design process is often very
                 expensive. To get around this problem, we introduce the
                 concept of ``interactive'' schedulability analysis. It
                 is based on the observation that if only a small number
                 of system parameters are changed, then it is not
                 necessary to rerun the full schedulability analysis
                 algorithm, thereby making the iterative design process
                 considerably faster. We refer to this analysis as being
                 ``interactive'' because it is supposed to be run in an
                 interactive mode. This concept is fairly general and
                 can be applied to a wide variety of task models. In
                 this paper, we have chosen the recurring real-time task
                 model, because it can be used to represent realistic
                 applications from the embedded systems domain
                 (containing conditional branches and fine-grained
                 deadline constraints). Our experimental results show
                 that using our scheme can lead to more than 20{\times}
                 speedup for each invocation of the schedulability
                 analysis algorithm, compared to the case where the full
                 algorithm is run.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "interactive design; nonfunctional constraints;
                 performance debugging; recurring real-time task model;
                 schedulability analysis",
}

@Article{Ha:2008:IES,
  author =       "Soonhoi Ha and Kiyoung Choi and Taewhan Kim and
                 Krisztian Flautner and Sanglyul Min and Wang Yi",
  title =        "Introduction to embedded systems week 2006 special
                 issue",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331332",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2008:EAC,
  author =       "Minyoung Kim and Sudarshan Banerjee and Nikil Dutt and
                 Nalini Venkatasubramanian",
  title =        "Energy-aware cosynthesis of real-time multimedia
                 applications on {MPSoCs} using heterogeneous scheduling
                 policies",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331333",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time multimedia applications are increasingly
                 being mapped onto MPSoC (multiprocessor system-on-chip)
                 platforms containing hardware--software IPs
                 (intellectual property), along with a library of common
                 scheduling policies such as EDF, RM. The choice of a
                 scheduling policy for each IP is a key decision that
                 greatly affects the design's ability to meet real-time
                 constraints, and also directly affects the energy
                 consumed by the design. We present a cosynthesis
                 framework for design space exploration that considers
                 heterogeneous scheduling while mapping multimedia
                 applications onto such MPSoCs. In our approach, we
                 select a suitable scheduling policy for each IP such
                 that system energy is minimized---our framework also
                 includes energy-reduction techniques utilizing dynamic
                 power management. Experimental results on a realistic
                 multimode multimedia terminal application demonstrate
                 that our approach enables us to select design points
                 with up to 60.5\% reduced energy for a given area
                 constraint, while meeting all real-time requirements.
                 More importantly, our approach generates a tradeoff
                 space between energy and cost allowing designers to
                 comparatively evaluate multiple system level
                 mappings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "cosynthesis; energy; MPSoC; real-time scheduling",
}

@Article{Raman:2008:ASW,
  author =       "Balaji Raman and Samarjit Chakraborty",
  title =        "Application-specific workload shaping in
                 multimedia-enabled personal mobile devices",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331334",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today, most personal mobile devices (e.g., cell phones
                 and PDAs) are multimedia-enabled and support a variety
                 of concurrently running applications, such as
                 audio/video players, word processors, and web browsers.
                 Media-processing applications are often computationally
                 expensive and most of these devices typically have
                 100--400-MHz processors. As a result, the
                 user-perceived application response times are often
                 poor when multiple applications are concurrently fired.
                 In this paper, we show that by using
                 application-specific dynamic buffering techniques, the
                 workload of these applications can be suitably
                 ``shaped'' to fit the available processor bandwidth.
                 Our techniques are analogous to traffic shaping, which
                 is widely used in communication networks to optimally
                 utilize network bandwidth. Such shaping techniques have
                 recently attracted a lot of attention in the context of
                 embedded systems design (e.g., for dynamic voltage
                 scaling). However, they have not been exploited for
                 enhanced schedulability of multiple applications, as we
                 do in this paper.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "mobile devices; multimedia systems; schedulability
                 analysis",
}

@Article{Egger:2008:DSM,
  author =       "Bernhard Egger and Jaejin Lee and Heonshik Shin",
  title =        "Dynamic scratchpad memory management for code in
                 portable systems with an {MMU}",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331335",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, we present a dynamic memory allocation
                 technique for a novel, horizontally partitioned memory
                 subsystem targeting contemporary embedded processors
                 with a memory management unit (MMU). We propose to
                 replace the on-chip instruction cache with a scratchpad
                 memory (SPM) and a small minicache. Serializing the
                 address translation with the actual memory access
                 enables the memory system to access either only the SPM
                 or the minicache. Independent of the SPM size and based
                 solely on profiling information, a postpass optimizer
                 classifies the code of an application binary into a
                 pageable and a cacheable code region. The latter is
                 placed at a fixed location in the external memory and
                 cached by the minicache. The former, the pageable code
                 region, is copied on demand to the SPM before
                 execution. Both the pageable code region and the SPM
                 are logically divided into pages the size of an MMU
                 memory page. Using the MMU's pagefault exception
                 mechanism, a runtime scratchpad memory manager (SPMM)
                 tracks page accesses and copies frequently executed
                 code pages to the SPM before they get executed. In
                 order to minimize the number of page transfers from the
                 external memory to the SPM, good code placement
                 techniques become more important with increasing sizes
                 of the MMU pages. We discuss code-grouping techniques
                 and provide an analysis of the effect of the MMU's page
                 size on execution time, energy consumption, and
                 external memory accesses. We show that by using the
                 data cache as a victim buffer for the SPM, significant
                 energy savings are possible. We evaluate our SPM
                 allocation strategy with fifteen applications,
                 including H.264, MP3, MPEG-4, and PGP. The proposed
                 memory system requires 8\% less die are compared to a
                 fully-cached configuration. On average, we achieve a
                 31\% improvement in runtime performance and a 35\%
                 reduction in energy consumption with an MMU page size
                 of 256 bytes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "code placement; compilers; heterogeneous memory;
                 paging; portable systems; postpass optimization;
                 scratchpad; victim cache; virtual memory",
}

@Article{Scholz:2008:MPB,
  author =       "Bernhard Scholz and Bernd Burgstaller and Jingling
                 Xue",
  title =        "Minimal placement of bank selection instructions for
                 partitioned memory architectures",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331336",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We have devised an algorithm for minimal placement of
                 bank selections in partitioned memory architectures.
                 This algorithm is parameterizable for a chosen metric,
                 such as speed, space, or energy. Bank switching is a
                 technique that increases the code and data memory in
                 microcontrollers without extending the address buses.
                 Given a program in which variables have been assigned
                 to data banks, we present a novel optimization
                 technique that minimizes the overhead of bank switching
                 through cost-effective placement of bank selection
                 instructions. The placement is controlled by a number
                 of different objectives, such as runtime, low power,
                 small code size or a combination of these parameters.
                 We have formulated the minimal placement of bank
                 selection instructions as a discrete optimization
                 problem that is mapped to a partitioned boolean
                 quadratic programming (PBQP) problem. We implemented
                 the optimization as part of a PIC Microchip backend and
                 evaluated the approach for several optimization
                 objectives. Our benchmark suite comprises programs from
                 MiBench and DSPStone plus a microcontroller real-time
                 kernel and drivers for microcontroller hardware
                 devices. Our optimization achieved a reduction in
                 program memory space of between 2.7 and 18.2\%, and an
                 overall improvement with respect to instruction cycles
                 between 5.0 and 28.8\%. Our optimization achieved the
                 minimal solution for all benchmark programs. We
                 investigated the scalability of our approach toward the
                 requirements of future generations of microcontrollers.
                 This study was conducted as a worst-case analysis on
                 the entire MiBench suite. Our results show that our
                 optimization (1) scales well to larger numbers of
                 memory banks, (2) scales well to the larger problem
                 sizes that will become feasible with future
                 microcontrollers, and (3) achieves minimal placement
                 for more than 72\% of all functions from MiBench.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "bank selection; partitioned Boolean quadratic
                 programming; partitioned memory architectures",
}

@Article{Choi:2008:SHM,
  author =       "Yoonseo Choi and Hwansoo Han",
  title =        "Shared heap management for memory-limited {Java}
                 virtual machines",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One scarce resource in embedded systems is memory.
                 Multitasking makes the lack of memory problem even
                 worse. Most current embedded systems, which do not
                 provide virtual memory, simply divide physical memory
                 and evenly assign contiguous memory chunks to multiple
                 applications. Such simple memory management can
                 frequently cause the lack of available memory for some
                 applications, while others are not using the full
                 amount of assigned memory. To overcome inefficiency in
                 current memory management, we present an efficient heap
                 management scheme that allows multiple applications to
                 share heap space. To reduce overall heap memory usage,
                 applications adaptively acquire subheaps out of shared
                 pool of memory and release surplus subheaps to shared
                 pool. As a result, applications see noncontiguous
                 multiple subheaps as a heap in their address space. We
                 target Java applications to implement our heap-sharing
                 scheme in the KVM from Sun Microsystems. To protect
                 fragmented heap space with a limited number of regions
                 in memory protection unit (MPU), we maintain only a
                 limited number of subheaps. We experimentally evaluate
                 our heap management scheme with J2ME MIDP applications.
                 Our static and dynamic schemes reduce heap memory
                 usage, on average, by 30 and 27\%, respectively. For
                 both schemes, overheads are kept low. The execution
                 times in our schemes are increased only by 0.01\% for
                 static scheme and 0.35\% for dynamic scheme, on
                 average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "dynamic memory management; garbage collection; heap
                 sharing; memory protection unit",
}

@Article{So:2008:UHS,
  author =       "Hayden Kwok-Hay So and Robert Brodersen",
  title =        "A unified hardware\slash software runtime environment
                 for {FPGA}-based reconfigurable computers using
                 {BORPH}",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper explores the design and implementation of
                 BORPH, an operating system designed for FPGA-based
                 reconfigurable computers. Hardware designs execute as
                 normal UNIX processes under BORPH, having access to
                 standard OS services, such as file system support.
                 Hardware and software components of user designs may,
                 therefore, run as communicating processes within
                 BORPH's runtime environment. The familiar language
                 independent UNIX kernel interface facilitates easy
                 design reuse and rapid application development. To
                 develop hardware designs, a Simulink-based design flow
                 that integrates with BORPH is employed. Performances of
                 BORPH on two on-chip systems implemented on a BEE2
                 platform are compared.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "BORPH; FPGA; hardware process; reconfigurable
                 computers",
}

@Article{Caspi:2008:SPM,
  author =       "Paul Caspi and Norman Scaife and Christos Sofronis and
                 Stavros Tripakis",
  title =        "Semantics-preserving multitask implementation of
                 synchronous programs",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We study the implementation of a synchronous program
                 as a set of multiple tasks running on the same
                 computer, and scheduled by a real-time operating system
                 using some preemptive scheduling policy, such as fixed
                 priority or earliest-deadline first. Multitask
                 implementations are necessary, for instance, in
                 multiperiodic applications, when the worst-case
                 execution time of the program is larger than its
                 smallest period. In this case, a single-task
                 implementation violates the schedulability assumption
                 and, therefore, the synchrony hypothesis does not hold.
                 We are aiming at semantics-preserving implementations,
                 where, for a given input sequence, the output sequence
                 produced by the implementation is the same as that
                 produced by the original synchronous program, and this
                 under all possible executions of the implementation.
                 Straightforward implementation techniques are not
                 semantics-preserving. We present an intertask
                 communication protocol, called DBP, that is
                 semantics-preserving and memory-optimal. DBP guarantees
                 semantical preservation under all possible triggering
                 patterns of the synchronous program: thus, it is
                 applicable not only to time-, but also event-triggered
                 applications. DBP works under both fixed priority and
                 earliest-deadline first scheduling. DBP is a
                 nonblocking protocol based on the use of intermediate
                 buffers and manipulations of write-to/read-from
                 pointers to these buffers: these manipulations happen
                 upon arrivals, rather than executions of tasks, which
                 is a distinguishing feature of DBP. DBP is
                 memory-optimal in the sense that it uses as few buffers
                 as needed, for any given triggering pattern. In the
                 worst case, DBP requires, at most, $ N + 2 $ buffers
                 for each writer, where $N$ is the number of readers for
                 this writer.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded software; model-based design; optimality;
                 preemptive scheduling; process communication;
                 semantical preservation; synchronous programming",
}

@Article{Liu:2008:HPP,
  author =       "Duo Liu and Zheng Chen and Bei Hua and Nenghai Yu and
                 Xinan Tang",
  title =        "High-performance packet classification algorithm for
                 multithreaded {IXP} network processor",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Packet classification is crucial for the Internet to
                 provide more value-added services and guaranteed
                 quality of service. Besides hardware-based solutions,
                 many software-based classification algorithms have been
                 proposed. However, classifying at 10 Gbps speed or
                 higher is a challenging problem and it is still one of
                 the performance bottlenecks in core routers. In
                 general, classification algorithms face the same
                 challenge of balancing between high classification
                 speed and low memory requirements. This paper proposes
                 a modified recursive flow classification (RFC)
                 algorithm, Bitmap-RFC, which significantly reduces the
                 memory requirements of RFC by applying a bitmap
                 compression technique. To speed up classifying speed,
                 we exploit the multithreaded architectural features in
                 various algorithm development stages from algorithm
                 design to algorithm implementation. As a result,
                 Bitmap-RFC strikes a good balance between speed and
                 space. It can significantly keep both high
                 classification speed and reduce memory space
                 consumption. This paper investigates the main NPU
                 software design aspects that have dramatic performance
                 impacts on any NPU-based implementations: memory space
                 reduction, instruction selection, data allocation, task
                 partitioning, and latency hiding. We experiment with an
                 architecture-aware design principle to guarantee the
                 high performance of the classification algorithm on an
                 NPU implementation. The experimental results show that
                 the Bitmap-RFC algorithm achieves 10 Gbps speed or
                 higher and has a good scalability on Intel IXP2800
                 NPU.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "architecture; embedded system design; multithreading;
                 network processor; packet classification; thread-level
                 parallelism",
}

@Article{Zhuo:2008:EED,
  author =       "Jianli Zhuo and Chaitali Chakrabarti",
  title =        "Energy-efficient dynamic task scheduling algorithms
                 for {DVS} systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic voltage scaling (DVS) is a well-known
                 low-power design technique that reduces the processor
                 energy by slowing down the DVS processor and stretching
                 the task execution time. However, in a DVS system
                 consisting of a DVS processor and multiple devices,
                 slowing down the processor increases the device energy
                 consumption and thereby the system-level energy
                 consumption. In this paper, we first use system-level
                 energy consideration to derive the ``optimal '' scaling
                 factor by which a task should be scaled if there are no
                 deadline constraints. Next, we develop dynamic
                 task-scheduling algorithms that make use of dynamic
                 processor utilization and optimal scaling factor to
                 determine the speed setting of a task. We present
                 algorithm duEDF, which reduces the CPU energy
                 consumption and algorithm duSYS and its reduced
                 preemption version, duSYS\_PC, which reduce the
                 system-level energy. Experimental results on the
                 video-phone task set show that when the CPU power is
                 dominant, algorithm duEDF results in up to 45\% energy
                 savings compared to the non-DVS case. When the CPU
                 power and device power are comparable, algorithms duSYS
                 and duSYS\_PC achieve up to 25\% energy saving compared
                 to CPU energy-efficient algorithm duEDF, and up to 12\%
                 energy saving over the non-DVS scheduling algorithm.
                 However, if the device power is large compared to the
                 CPU power, then we show that a DVS scheme does not
                 result in lowest energy. Finally, a comparison of the
                 performance of algorithms duSYS and duSYS\_PC show that
                 preemption control has minimal effect on system-level
                 energy reduction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "DVS system; dynamic task scheduling; energy
                 minimization; optimal scaling factor; real time",
}

@Article{Lee:2008:DFR,
  author =       "Sheayun Lee and Insik Shin and Woonseok Kim and Insup
                 Lee and Sang Lyul Min",
  title =        "A design framework for real-time embedded systems with
                 code size and energy constraints",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time embedded systems are typically constrained
                 in terms of three system performance criteria: space,
                 time, and energy. The performance requirements are
                 directly translated into constraints imposed on the
                 system's resources, such as code size, execution time,
                 and energy consumption. These resource constraints
                 often interact or even conflict with each other in a
                 complex manner, making it difficult for a system
                 developer to apply a well-defined design methodology in
                 developing a real-time embedded system. Motivated by
                 this observation, we propose a design framework that
                 can flexibly balance the tradeoff involving the
                 system's code size, execution time, and energy
                 consumption. Given a system specification and an
                 optimization criteria, the proposed technique generates
                 a set of design parameters in such a way that a system
                 cost function is minimized while the given resource
                 constraints are satisfied. Specifically, the technique
                 derives code generation decision for each task so that
                 a specific version of code is selected among a number
                 of different ones that have distinct characteristics in
                 terms of code size and execution time. In addition, the
                 design framework determines the voltage/frequency
                 setting for a variable voltage processor whose supply
                 voltage can be adjusted at runtime in order to minimize
                 the energy consumption while execution performance is
                 degraded accordingly. The proposed technique formulates
                 this design process as a constrained optimization
                 problem. We show that this optimization problem is
                 NP-hard and then provide a heuristic solution to it. We
                 show that these seemingly conflicting design goals can
                 be pursued by using a simple optimization algorithm
                 that works with a single optimization criteria.
                 Moreover, the optimization is driven by an abstract
                 system specification given by the system developer, so
                 that the system development process can be automated.
                 The results from our simulation show that the proposed
                 algorithm finds a solution that is close to the optimal
                 one with the average error smaller than 1.0\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "code size; embedded; energy; real-time; scheduling",
}

@Article{Manolache:2008:TMP,
  author =       "Sorin Manolache and Petru Eles and Zebo Peng",
  title =        "Task mapping and priority assignment for soft
                 real-time applications under deadline miss ratio
                 constraints",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Both analysis and design optimisation of real-time
                 systems has predominantly concentrated on considering
                 hard real-time constraints. For a large class of
                 applications, however, this is both unrealistic and
                 leads to unnecessarily expensive implementations. This
                 paper addresses the problem of task priority assignment
                 and task mapping in the context of multiprocessor
                 applications with stochastic execution times and in the
                 presence of constraints on the percentage of missed
                 deadlines. We propose a design space exploration
                 strategy together with a fast method for system
                 performance analysis. Experiments emphasize the
                 efficiency of the proposed analysis method and
                 optimisation heuristic in generating high-quality
                 implementations of soft real-time systems with
                 stochastic task execution times and constraints on
                 deadline miss ratios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "mapping; priority assignment; schedulability analysis;
                 soft real-time systems; stochastic task execution
                 times",
}

@Article{Park:2008:SRB,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "Secure routing based on distributed key sharing in
                 large-scale sensor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sensor networks, usually built with a large number of
                 small, low-cost sensor nodes, are characterized by
                 their large-scale and unattended deployment,
                 necessitating ``secure'' communications between nearby,
                 as well as remote, sensor nodes for their intended
                 applications and services. Key setup/sharing is crucial
                 to the protection of such applications/services from
                 attacks, but existing (public-key, cluster-based, or
                 pairwise) solutions become too expensive (hence,
                 inefficient) when the underlying applications/services
                 require communications between distant sensor nodes. To
                 remedy this inefficiency, we propose a novel
                 distributed key-sharing scheme, in which each
                 participating sensor node shares unique keys with a
                 small number of other sensor nodes---called distributed
                 key servers (DKSs)---chosen according to their
                 geographic distance and communication direction. Using
                 DKSs, we develop two secure routing protocols: (1)
                 secure geographic forwarding that delivers packets by
                 using a chain of DKS lookups, each secured with its own
                 key and forwarded geographically; and (2) key
                 establishment that creates a secure session between two
                 distant sensor nodes based solely on symmetric-ciphers.
                 These protocols enable low-cost, low-power sensors to
                 provide high-level security at a very low cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "attack tolerance; distributed key sharing and servers;
                 key establishment; large-scale sensor networks; secure
                 geographic forwarding",
}

@Article{Cho:2008:DNP,
  author =       "Young H. Cho and William H. Mangione-Smith",
  title =        "Deep network packet filter design for reconfigurable
                 devices",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most network routers and switches provide some
                 protection against the network attacks. However, the
                 rapidly increasing amount of damages reported over the
                 past few years indicates the urgent need for tougher
                 security. Deep-packet inspection is one of the
                 solutions to capture packets that can not be identified
                 using the traditional methods. It uses a list of
                 signatures to scan the entire content of the packet,
                 providing the means to filter harmful packets out of
                 the network. Since one signature does not depend on the
                 other, the filtering process has a high degree of
                 parallelism. Most software and hardware deep-packet
                 filters that are in use today execute the tasks under
                 Von Neuman architecture. Such architecture can not
                 fully take advantage of the parallelism. For instance,
                 one of the most widely used network intrusion-detection
                 systems, Snort, configured with 845 patterns, running
                 on a dual 1-GHz Pentium III system, can sustain a
                 throughput of only 50 Mbps. The poor performance is
                 because of the fact that the processor is programmed to
                 execute several tasks sequentially instead of
                 simultaneously. We designed scalable deep-packet
                 filters on field-programmable gate arrays (FPGAs) to
                 search for all data-independent patterns
                 simultaneously. With FPGAs, we have the ability to
                 reprogram the filter when there are any changes to the
                 signature set. The smallest full-pattern matcher
                 implementation for the latest Snort NIDS fits in a
                 single 400k Xilinx FPGA (Spartan 3-XC3S400) with a
                 sustained throughput of 1.6 Gbps. Given a larger FPGA,
                 the design can scale linearly to support a greater
                 number of patterns, as well as higher data
                 throughput.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "firewall; network intrusion detection; string filter;
                 virus; worm",
}

@Article{Pasricha:2008:FEB,
  author =       "Sudeep Pasricha and Nikil Dutt and Mohamed
                 Ben-Romdhane",
  title =        "Fast exploration of bus-based communication
                 architectures at the {CCATB} abstraction",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1331331.1331346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Currently, system-on-chip (SoC) designs are becoming
                 increasingly complex, with more and more components
                 being integrated into a single SoC design.
                 Communication between these components is increasingly
                 dominating critical system paths and frequently becomes
                 the source of performance bottlenecks. It, therefore,
                 becomes imperative for designers to explore the
                 communication space early in the design flow.
                 Traditionally, system designers have used Pin-Accurate
                 Bus Cycle Accurate (PA-BCA) models for early
                 communication space exploration. These models capture
                 all of the bus signals and strictly maintain cycle
                 accuracy, which is useful for reliable performance
                 exploration but results in slow simulation speeds for
                 complex, designs, even when they are modeled using
                 high-level languages. Recently, there have been several
                 efforts to use the Transaction-Level Modeling (TLM)
                 paradigm for improving simulation performance in BCA
                 models. However, these transaction-based BCA (T-BCA)
                 models capture a lot of details that can be eliminated
                 when exploring communication architectures. In this
                 paper, we extend the TLM approach and propose a new
                 transaction-based modeling abstraction level (CCATB) to
                 explore the communication design space. Our abstraction
                 level bridges the gap between the TLM and BCA levels,
                 and yields an average performance speedup of 120\% over
                 PA-BCA and 67\% over T-BCA models, on average. The
                 CCATB models are not only faster to simulate, but also
                 extremely accurate and take less time to model compared
                 to both T-BCA and PA-BCA models. We describe the
                 mechanisms that produce the speedup in CCATB models and
                 also analyze how the achieved simulation speedup scales
                 with design complexity. To demonstrate the
                 effectiveness of using CCATB for exploration, we
                 present communication space exploration case studies
                 from the broadband communication and multimedia
                 application domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "communication architecture; on-chip bus; performance
                 exploration; system-on-chip; transaction-level
                 modeling",
}

@Article{DiNatale:2008:BOM,
  author =       "Marco {Di Natale} and Valerio Pappalardo",
  title =        "Buffer optimization in multitask implementations of
                 {Simulink} models",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347376",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic generation of a controller implementation
                 from a synchronous reactive model is among the best
                 practices for software development in the automotive
                 and aeronautics industry, because of the possibility of
                 simulation, model checking, and error-free
                 implementation. This paper discusses an algorithm for
                 optimizing the single-processor multitask
                 implementation of Simulink models with real-time
                 execution constraints, derived from the sampling rates
                 of the functional blocks. Existing code generation
                 tools enforce the addition of extra buffering and
                 latencies whenever there is a rate transition among
                 functional blocks. This work shows how timing analysis
                 can be used to find the cases in which additional
                 buffering and latency can be avoided, improving the
                 space and time performance of the application. The
                 proposed search algorithm allows finding a solution
                 with reduced and possibly minimal use of buffering even
                 for very high values of processor utilization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "code generation; real-time programming;
                 schedulability; software models",
}

@Article{Trajkovic:2008:ISA,
  author =       "Jelena Trajkovic and Alexander V. Veidenbaum and Arun
                 Kejariwal",
  title =        "Improving {SDRAM} access energy efficiency for
                 low-power embedded systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347377",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "DRAM (dynamic random-access memory) energy consumption
                 in low-power embedded systems can be very high,
                 exceeding that of the data cache or even that of the
                 processor. This paper presents and evaluates a scheme
                 for reducing the energy consumption of SDRAM
                 (synchronous DRAM) memory access by a combination of
                 techniques that take advantage of SDRAM energy
                 efficiencies in bank and row access. This is achieved
                 by using small, cachelike structures in the memory
                 controller to prefetch an additional cache block(s) on
                 SDRAM reads and to combine block writes to the same
                 SDRAM row. The results quantify the SDRAM energy
                 consumption of MiBench applications and demonstrate
                 significant savings in SDRAM energy consumption, 23\%,
                 on average, and reduction in the energy-delay product,
                 44\%, on average. The approach also improves
                 performance: the CPI is reduced by 26\%, on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded processors and low power; fetch buffer;
                 SDRAM; write-combining buffer",
}

@Article{Varma:2008:AFS,
  author =       "Ankush Varma and Eric Debes and Igor Kozintsev and
                 Paul Klein and Bruce Jacob",
  title =        "Accurate and fast system-level power modeling: an
                 {XScale}-based case study",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347378",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Accurate and fast system modeling is central to the
                 rapid design space exploration needed for
                 embedded-system design. With fast, complex SoCs playing
                 a central role in such systems, system designers have
                 come to require MIPS-range simulation speeds and
                 near-cycle accuracy. The sophisticated simulation
                 frameworks that have been developed for high-speed
                 system performance modeling do not address power
                 consumption, although it is a key design constraint. In
                 this paper, we define a simulation-based methodology
                 for extending system performance modeling frameworks to
                 also include power modeling. We demonstrate the use of
                 this methodology with a case study of a real, complex
                 embedded system, comprising the Intel XScale embedded
                 microprocessor, its WMMX SIMD co processor, L1 caches,
                 SDRAM, and the on-board address and data buses. We
                 describe detailed power models for each of these
                 components and validate them against physical
                 measurements from hardware, demonstrating that such
                 frameworks enable designers to model both power and
                 performance at high speeds without sacrificing
                 accuracy. Our results indicate that the power estimates
                 obtained are accurate within 5\% of physical
                 measurements from hardware, while simulation speeds
                 consistently exceed a million instructions per second
                 (MIPS).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded systems; power modeling; SystemC",
}

@Article{Aamodt:2008:CTI,
  author =       "Tor M. Aamodt and Paul Chow",
  title =        "Compile-time and instruction-set methods for improving
                 floating- to fixed-point conversion accuracy",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347379",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes and evaluates compile time and
                 instruction-set techniques for improving the accuracy
                 of signal-processing algorithms run on fixed-point
                 embedded processors. These techniques are proposed in
                 the context of a profile guided floating- to
                 fixed-point compiler-based conversion process. A novel
                 fixed-point scaling algorithm (IRP) is introduced that
                 exploits correlations between values in a program by
                 applying fixed-point scaling, retaining as much
                 precision as possible without causing overflow. This
                 approach is extended into a more aggressive scaling
                 algorithm (IRP-SA) by leveraging the modulo nature of
                 2's complement addition and subtraction to discard most
                 significant bits that may not be redundant
                 sign-extension bits. A complementary scaling technique
                 (IDS) is then proposed that enables the fixed-point
                 scaling of a variable to be parameterized, depending
                 upon the context of its definitions and uses. Finally,
                 a novel instruction-set enhancement--- fractional
                 multiplication with internal left shift (FMLS)---is
                 proposed to further leverage interoperand correlations
                 uncovered by the IRP-SA scaling algorithm. FMLS
                 preserves a different subset of the full product's bits
                 than traditional fractional fixed-point or integer
                 multiplication. On average, FMLS combined with IRP-SA
                 improves accuracy on processors with uniform bitwidth
                 register architectures by the equivalent of 0.61 bits
                 of additional precision for a set of signal-processing
                 benchmarks (up to 2 bits). Even without employing FMLS,
                 the IRP-SA scaling algorithm achieves additional
                 accuracy over two previous fixed-point scaling
                 algorithms by averages of 1.71 and 0.49 bits.
                 Furthermore, as FMLS combines multiplication with a
                 scaling shift, it reduces execution time by an average
                 of 9.8\%. An implementation of IDS, specialized to
                 single-nested loops, is found to improve accuracy of a
                 lattice filter benchmark by the equivalent of more than
                 16-bits of precision.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compilation; digital signal processing; fixed-point;
                 fractional multiplication; scaling; signal-to-noise
                 ratio",
}

@Article{Fei:2008:EAF,
  author =       "Yunsi Fei and Lin Zhong and Niraj K. Jha",
  title =        "An energy-aware framework for dynamic software
                 management in mobile computing systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347380",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy efficiency has become a very important and
                 challenging issue for resource-constrained mobile
                 computers. In this article, we propose a novel dynamic
                 software management (DSOM) framework to improve battery
                 utilization. We have designed and implemented a DSOM
                 module in user space, independent of the operating
                 system (OS), which explores quality-of-service (QoS)
                 adaptation to reduce system energy and employs a
                 priority-based preemption policy for multiple
                 applications to avoid competition for limited energy
                 resources. Software energy macromodels for mobile
                 applications are employed to predict energy demand at
                 each QoS level, so that the DSOM module is able to
                 select the best possible trade-off between energy
                 conservation and application QoS; it also honors the
                 priority desired by the user. Our experimental results
                 for some mobile applications (video player, speech
                 recognizer, voice-over-IP) show that this approach can
                 meet user-specified task-oriented goals and
                 significantly improve battery utilization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "energy macromodel; runtime coordination; software
                 adaptation",
}

@Article{Zhong:2008:SWE,
  author =       "Xiliang Zhong and Cheng-Zhong Xu",
  title =        "System-wide energy minimization for real-time tasks:
                 {Lower} bound and approximation",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347381",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a dynamic voltage scaling (DVS) technique
                 that minimizes system-wide energy consumption for both
                 periodic and sporadic tasks. It is known that a system
                 consists of processors and a number of other
                 components. Energy-aware processors can be run in
                 different speed levels; components like memory and I/O
                 subsystems and network interface cards can be in a
                 standby state when they are active, but idle. Processor
                 energy optimization solutions are not necessarily
                 efficient from the perspective of systems. Current
                 system-wide energy optimization studies are often
                 limited to periodic tasks with heuristics in getting
                 approximated solutions. In this paper, we develop an
                 exact dynamic programming algorithm for periodic tasks
                 on processors with practical discrete speed levels. The
                 algorithm determines the lower bound of energy
                 expenditure in pseudopolynomial time. An approximation
                 algorithm is proposed to provide performance guarantee
                 with a given bound in polynomial running time. Because
                 of their time efficiency, both the optimization and
                 approximation algorithms can be adapted for online
                 scheduling of sporadic tasks with irregular task
                 releases. We prove that system-wide energy optimization
                 for sporadic tasks is NP-hard in the strong sense. We
                 develop (pseudo-) polynomial-time solutions by
                 exploiting its inherent properties.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "dynamic power management; dynamic voltage scaling;
                 power-aware scheduling; real-time systems",
}

@Article{Zhou:2008:CIA,
  author =       "Ye Zhou and Edward A. Lee",
  title =        "Causality interfaces for actor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347382",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider concurrent models of computation where
                 ``actors'' (components that are in charge of their own
                 actions) communicate by exchanging messages. The
                 interfaces of actors principally consist of ``ports,''
                 which mediate the exchange of messages. Actor-oriented
                 architectures contrast with and complement
                 object-oriented models by emphasizing the exchange of
                 data between concurrent components rather than
                 transformation of state. Examples of such models of
                 computation include the classical actor model,
                 synchronous languages, data-flow models, process
                 networks, and discrete-event models. Many experimental
                 and production languages used to design embedded
                 systems are actor oriented and based on one of these
                 models of computation. Many of these models of
                 computation benefit considerably from having access to
                 causality information about the components. This paper
                 augments the interfaces of such components to include
                 such causality information. It shows how this causality
                 information can be algebraically composed so that
                 compositions of components acquire causality interfaces
                 that are inferred from their components and the
                 interconnections. We illustrate the use of these
                 causality interfaces to statically analyze timed models
                 and synchronous language compositions for causality
                 loops and data-flow models for deadlock. We also show
                 that causality analysis for each communication cycle
                 can be performed independently and in parallel, and it
                 is only necessary to analyze one port for each cycle.
                 Finally, we give a conservative approximation technique
                 for handling dynamically changing causality
                 properties.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "actors; behavioral types; causality; data flow;
                 deadlock; discrete-event models; interfaces;
                 synchronous languages; timed systems",
}

@Article{Shin:2008:CRT,
  author =       "Insik Shin and Insup Lee",
  title =        "Compositional real-time scheduling framework with
                 periodic model",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347383",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is desirable to develop large complex systems using
                 components based on systematic abstraction and
                 composition. Our goal is to develop a compositional
                 real-time scheduling framework to support abstraction
                 and composition techniques for real-time aspects of
                 components. In this paper, we present a formal
                 description of compositional real-time scheduling
                 problems, which are the component abstraction and
                 composition problems. We identify issues that need be
                 addressed by solutions and provide our framework for
                 the solutions, which is based on the periodic
                 interface. Specifically, we introduce the periodic
                 resource model to characterize resource allocations
                 provided to a single component. We present exact
                 schedulability conditions for the standard Liu and
                 Layland periodic task model and the proposed periodic
                 resource model under EDF and RM scheduling, and we show
                 that the component abstraction and composition problems
                 can be addressed with periodic interfaces through the
                 exact schedulability conditions. We also provide the
                 utilization bounds of a periodic task set over the
                 periodic resource model and the abstraction bounds of
                 periodic interfaces for a periodic task set under EDF
                 and RM scheduling. We finally present the analytical
                 bounds of overheads that our solution incurs in terms
                 of resource utilization increase and evaluate the
                 overheads through simulations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "abstract; component; composition; hierarchical;
                 interface; real-time; scheduling",
}

@Article{Voyiatzis:2008:SFS,
  author =       "Artemios G. Voyiatzis and Dimitrios N. Serpanos",
  title =        "The security of the {Fiat--Shamir} scheme in the
                 presence of transient hardware faults",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347384",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Implementation cryptanalysis has emerged as a
                 realistic threat for cryptographic systems. It consists
                 of two classes of attacks: fault-injection and
                 side-channel attacks. In this work, we examine the
                 resistance of the Fiat--Shamir scheme to
                 fault-injection attacks, since Fiat--Shamir is a
                 popular scheme for ``light'' consumer devices, such as
                 smartcards, in a wide range of consumer services. We
                 prove that an existing attack, known as the Bellcore
                 attack, is incomplete. We propose an extension to the
                 protocol that proactively secures Fiat--Shamir systems
                 from the Bellcore attack and we prove its strength.
                 Finally, we introduce a new attack model, which, under
                 stronger assumptions, can derive the secret keys from
                 both the original Fiat--Shamir scheme as well as its
                 proposed extension. Our approach demonstrates that
                 countermeasures for implementation cryptanalysis must
                 be carefully designed and that deployed systems must
                 include appropriate protection mechanisms for all known
                 attacks and be flexible enough to incorporate
                 countermeasures for new ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Bellcore attack; cryptography; Fiat--Shamir
                 identification scheme; side-channel attacks;
                 smartcards",
}

@Article{Gurun:2008:NGP,
  author =       "Selim Gurun and Chandra Krintz and Rich Wolski",
  title =        "{NWSLite}: a general-purpose, nonparametric prediction
                 utility for embedded systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347385",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Time series-based prediction methods have a wide range
                 of uses in embedded systems. Many OS algorithms and
                 applications require accurate prediction of demand and
                 supply of resources. However, configuring prediction
                 algorithms is not easy, since the dynamics of the
                 underlying data requires continuous observation of the
                 prediction error and dynamic adaptation of the
                 parameters to achieve high accuracy. Current prediction
                 methods are either too costly to implement on
                 resource-constrained devices or their parameterization
                 is static, making them inappropriate and inaccurate for
                 a wide range of datasets. This paper presents NWSLite,
                 a prediction utility that addresses these shortcomings
                 on resource-restricted platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "computation offloading; CPU availability estimation;
                 embedded systems; network performance estimation;
                 prediction algorithms",
}

@Article{Yan:2008:DOD,
  author =       "Ting Yan and Yu Gu and Tian He and John A. Stankovic",
  title =        "Design and optimization of distributed sensing
                 coverage in wireless sensor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347386",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For many sensor network applications, such as military
                 surveillance, it is necessary to provide full sensing
                 coverage to a security-sensitive area while, at the
                 same time, minimizing energy consumption and extending
                 system lifetime by leveraging the redundant deployment
                 of sensor nodes. In this paper, we propose a
                 surveillance service for sensor networks based on a
                 distributed energy-efficient sensing coverage protocol.
                 In the protocol, each node is able to dynamically
                 decide a schedule for itself to guarantee a certain
                 degree-of-coverage (DOC) with average energy
                 consumption inversely proportional to the node density.
                 Several optimizations and extensions are proposed to
                 enhance the basic design with a better load-balance
                 feature and a longer network lifetime. We consider and
                 address the impact of the target size and the
                 unbalanced initial energy capacity of individual nodes
                 to the network lifetime. Several practical issues such
                 as the localization error, irregular sensing range, and
                 unreliable communication links are addressed as well.
                 Simulation shows that our protocol extends system
                 lift-time significantly with low energy consumption. It
                 outperforms other state-of-the-art schemes by as much
                 as 50\% reduction in energy consumption and as much as
                 130\% increase in the half-life of the network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "energy conservation; sensing coverage; sensor
                 networks",
}

@Article{Ozer:2008:SBE,
  author =       "Emre {\"O}zer and Andy P. Nisbet and David Gregg",
  title =        "A stochastic bitwidth estimation technique for compact
                 and low-power custom processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There is an increasing trend toward compiling from C
                 to custom hardware for designing embedded systems in
                 which the area and power consumption of
                 application-specific functional units, registers, and
                 memory blocks are heavily dependent on the bit-widths
                 of integer operands used in computations. The actual
                 bit-width required to store the values assigned to an
                 integer variable during the execution of a program will
                 not, in general, match the built-in C data types. Thus,
                 precious area is wasted if the built-in data type sizes
                 are used to declare the size of integer operands. In
                 this paper, we introduce stochastic bit-width
                 estimation that follows a simulation-based
                 probabilistic approach to estimate the bit-widths of
                 integer variables using extreme value theory. The
                 estimation technique is also empirically compared to
                 two compile-time integer bit-width analysis techniques.
                 Our experimental results show that the stochastic
                 bit-width estimation technique dramatically reduces
                 integer bit-widths and, therefore, enables more compact
                 and power-efficient custom hardware designs than the
                 compile-time integer bit-width analysis techniques. Up
                 to 37\% reduction in custom hardware area and 30\%
                 reduction in logic power consumption using stochastic
                 bit-width estimation can be attained over ten integer
                 applications implemented on an FPGA chip.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "bit-width analysis; custom hardware; extreme value
                 theory; FPGA; statistical estimation",
}

@Article{Kumar:2008:CCP,
  author =       "Rajeev Kumar and Dipankar Das",
  title =        "Code compression for performance enhancement of
                 variable-length embedded processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most of the work done in the field of code compression
                 pertains to processors with fixed-length instruction
                 encoding. The design of a code-compression scheme for
                 variable-length instruction encodings poses newer
                 design challenges. In this work, we first investigate
                 the scope for code compression on variable-length
                 instruction-set processors whose encodings are already
                 optimized to a certain extent with respect to their
                 usage. For such ISAs instruction boundaries are not
                 known prior to decoding. Another challenging task of
                 designing a code-compression scheme for such ISAs is
                 designing the decompression hardware, which must
                 decompress code postcache so that we gain in
                 performance. We present two dictionary-based code
                 compression schemes. The first algorithm uses a
                 bit-vector; the second one uses reserved instructions
                 to identify code words. We design additional logic for
                 each of the schemes to decompress the code on-the-fly.
                 We test the two algorithms with a variable-length RISC
                 processor. We provide a detailed experimental analysis
                 of the empirical results obtained by extensive
                 simulation-based design space exploration for this
                 system. The optimized decompressor can now execute
                 compressed program faster than the native program. The
                 experiments demonstrate reduction in code size (up to
                 30\%), speed-up (up to 15\%), and bus-switching
                 activity (up to 20\%). We also implement one
                 decompressor in a hardware description language and
                 synthesize it to illustrate the small overheads
                 associated with the proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "bus switching; code compression; code decompression;
                 embedded systems; instruction memory; RISC processor;
                 variable-length ISAs",
}

@Article{Wilhelm:2008:WCE,
  author =       "Reinhard Wilhelm and Jakob Engblom and Andreas
                 Ermedahl and Niklas Holsti and Stephan Thesing and
                 David Whalley and Guillem Bernat and Christian
                 Ferdinand and Reinhold Heckmann and Tulika Mitra and
                 Frank Mueller and Isabelle Puaut and Peter Puschner and
                 Jan Staschulat and Per Stenstr{\"o}m",
  title =        "The worst-case execution-time problem---overview of
                 methods and survey of tools",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1347375.1347389",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The determination of upper bounds on execution times,
                 commonly called worst-case execution times (WCETs), is
                 a necessary step in the development and validation
                 process for hard real-time systems. This problem is
                 hard if the underlying processor architecture has
                 components, such as caches, pipelines, branch
                 prediction, and other speculative components. This
                 article describes different approaches to this problem
                 and surveys several commercially available tools 1 and
                 research prototypes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "hard real time; worst-case execution times",
}

@Article{Hessell:2008:EES,
  author =       "Fabiano Hessell and Kenneth Kent and Dionisios
                 Pnevmatikatos",
  title =        "Editorial: {Embedded} systems --- new challenges and
                 future directions",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376805",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2008:RFF,
  author =       "Chanik Park and Wonmoon Cheon and Jeonguk Kang and
                 Kangho Roh and Wonhee Cho and Jin-Soo Kim",
  title =        "A reconfigurable {FTL} (flash translation layer)
                 architecture for {NAND} flash-based applications",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376806",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a novel FTL (flash translation layer)
                 architecture is proposed for NAND flash-based
                 applications such as MP3 players, DSCs (digital still
                 cameras) and SSDs (solid-state drives). Although the
                 basic function of an FTL is to translate a logical
                 sector address to a physical sector address in flash
                 memory, efficient algorithms of an FTL have a
                 significant impact on performance as well as the
                 lifetime. After the dominant parameters that affect the
                 performance and endurance are categorized, the design
                 space of the FTL architecture is explored based on a
                 diverse workload analysis. With the proposed FTL
                 architectural framework, it is possible to decide which
                 configuration of FTL mapping parameters yields the best
                 performance, depending on the differing characteristics
                 of various NAND flash-based applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Flash memory; FTL; performance analysis;
                 reconfigurable architecture",
}

@Article{Popovici:2008:PBS,
  author =       "Katalin Popovici and Xavier Guerin and Frederic
                 Rousseau and Pier Stanislao Paolucci and Ahmed Amine
                 Jerraya",
  title =        "Platform-based software design flow for heterogeneous
                 {MPSoC}",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376807",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Current multimedia applications demand complex
                 heterogeneous multiprocessor architectures with
                 specific communication infrastructure in order to
                 achieve the required performances. Programming these
                 architectures usually results in writing separate
                 low-level code for the different processors (DSP,
                 microcontroller), implying late global validation of
                 the overall application with the hardware platform. We
                 propose a platform-based software design flow able to
                 efficiently use the resources of the architecture and
                 allowing easy experimentation of several mappings of
                 the application onto the platform resources. We use a
                 high-level environment to capture both application and
                 architecture initial representations. An executable
                 software stack is generated automatically for each
                 processor from the initial model. The software
                 generation and validation is performed gradually
                 corresponding to different software abstraction levels.
                 Specific software development platforms (abstract
                 models of the architecture) are generated and used to
                 allow debugging of the different software components
                 with explicit hardware-software interaction. We applied
                 this approach on a multimedia platform, involving a
                 high performance DSP and a RISC processor, to explore
                 communication architecture and generate an efficient
                 executable code for a multimedia application. Based on
                 automatic tools, the proposed flow increases
                 productivity and preserves design quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "multimedia; Multiprocessor system-on chip; programming
                 environment; Simulink; software design; SystemC;
                 transaction level modeling",
}

@Article{Chattopadhyay:2008:PPA,
  author =       "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
                 Rakosi and K. Karuri and D. Kammler and R. Leupers and
                 G. Ascheid and H. Meyr",
  title =        "Prefabrication and postfabrication architecture
                 exploration for partially reconfigurable {VLIW}
                 processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376808",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern application-specific instruction-set processors
                 (ASIPs) face the daunting task of delivering high
                 performance for a wide range of applications. For
                 enhancing the performance, architectural features, for
                 example, pipelining, VLIW, are often employed in ASIPs,
                 leading to high design complexity. Integrated ASIP
                 design environments, like template-based approaches and
                 language-driven approaches, provide an answer to this
                 growing design complexity. At the same time, increasing
                 hardware design costs have motivated the processor
                 designers to introduce high flexibility in the
                 processor. Flexibility, in its most effective form, can
                 be introduced to the ASIP by coupling a reconfigurable
                 unit to the base processor. Because of its obvious
                 benefits, several reconfigurable ASIPs (rASIPs) have
                 been designed for years. This design paradigm gained
                 momentum with the advent of coarse-grained FPGAs, where
                 the lack of domain-specific performance common in
                 general-purpose FPGAs are largely overcome by choosing
                 application-dependent basic functional units. These
                 rASIP designs lack a generic flow from high-level
                 specification, resulting in intuitive design decisions
                 and hard-to-retarget processor design tools. Although
                 partial, template-based approaches for rASIP design is
                 existent, a clear design methodology especially for the
                 prefabrication architecture exploration is not present.
                 In order to address this issue, a high-level
                 specification and design methodology for partially
                 reconfigurable VLIW processors is proposed in this
                 article. To show the benefit of this approach, a
                 commercial VLIW processor is used as the base
                 architecture and two domains of applications are
                 studied for potential performance gain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ASIP; coarse-grained FPGA; VLIW",
}

@Article{Lin:2008:MAC,
  author =       "Yi-Neng Lin and Ying-Dar Lin and Yuan-Cheng Lai and
                 Kuo-Kun Tseng",
  title =        "Modeling and analysis of core-centric network
                 processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376809",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors can be categorized into two types,
                 the coprocessors-centric model in which data-plane is
                 handled by coprocessors, and the core-centric model in
                 which the core processes most of the data-plane packets
                 yet offloading some tasks to coprocessors. While the
                 former has been properly explored over various
                 applications, research regarding the latter remain
                 limited. Based on the previous experience of
                 prototyping the virtual private network (VPN) over the
                 IXP425 network processor, this work aims to derive
                 design implications for the core-centric model
                 performing computational intensive applications. From
                 system and IC vendors' perspectives, the
                 continuous-time Markov chain and Petri net simulations
                 are adopted to explore this architecture. Analytical
                 results prove to be quite inline with those of the
                 simulation and implementation. With subsequent
                 investigation, we find that appropriate process run
                 lengths can improve the effective core utilization by
                 2.26 times, and by offloading the throughput boosts 7.5
                 times. The results also suggest single-process
                 programming, since context-switch overhead impacts
                 considerably on the performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "core-centric; embedded system; modeling; Network
                 processor; simulation",
}

@Article{Get:2008:PFE,
  author =       "Jerome Hugues Get and Bechir Zalila Get and Laurent
                 Pautet Get and Fabrice Kordon",
  title =        "From the prototype to the final embedded system using
                 the {Ocarina AADL} tool suite",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376810",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Building distributed deal-time embedded systems
                 requires a stringent methodology, from early
                 requirement capture to full implementation. However,
                 there is a strong link between the requirements and the
                 final implementation (e.g., scheduling and resource
                 dimensioning). Therefore, a rapid prototyping process
                 based on automation of tedious and error-prone tasks
                 (analysis and code generation) is required to speed up
                 the development cycle. In this article, we show how the
                 AADL ({\em Architecture Analysis and Design
                 Language\/}), which appeared in late 2004, helps solve
                 these issues thanks to a dedicated tool suite. We then
                 detail the prototyping process and its current
                 implementation: Ocarina.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "AADL; distributed; DRE; embedded; Ocarina; PolyORB-HI;
                 real-time",
}

@Article{Benveniste:2008:CHR,
  author =       "Albert Benveniste and Beno{\^\i}t Caillaud and Luca P.
                 Carloni and Paul Caspi and Alberto L.
                 Sangiovanni-Vincentelli",
  title =        "Composing heterogeneous reactive systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376811",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a compositional theory of heterogeneous
                 reactive systems. The approach is based on the concept
                 of tags marking the events of the signals of a system.
                 Tags can be used for multiple purposes from indexing
                 evolution in time (time stamping) to expressing
                 relations among signals, like coordination (e.g.,
                 synchrony and asynchrony) and causal dependencies. The
                 theory provides flexibility in system modeling because
                 it can be used both as a unifying mathematical
                 framework to relate heterogeneous models of
                 computations and as a formal vehicle to implement
                 complex systems by combining heterogeneous components.
                 In particular, we introduce an algebra of tag
                 structures to define heterogeneous parallel composition
                 formally. Morphisms between tag structures are used to
                 define relationships between heterogeneous models at
                 different levels of abstraction. In particular, they
                 can be used to represent design transformations from
                 tightly synchronized specifications to
                 loosely-synchronized implementations. The theory has an
                 important application in the correct-by-construction
                 deployment of synchronous design on distributed
                 architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Compositionality; correct-by-construction design;
                 GALS; models of computation; reactive systems",
}

@Article{Gebotys:2008:EAW,
  author =       "Catherine H. Gebotys and Brian A. White",
  title =        "{EM} analysis of a wireless {Java}-based {PDA}",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376812",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The susceptibility of wireless portable devices to
                 electromagnetic (EM) attacks is largely unknown. If
                 analysis of electromagnetic (EM) waves emanating from
                 the wireless device during a cryptographic computation
                 do leak sufficient information, it may be possible for
                 an attacker to reconstruct the secret key. Possession
                 of the secret cryptographic key would render all future
                 wireless communications insecure and cause further
                 potential problems, such as identity theft. Despite the
                 complexities of a PDA wireless device, such as
                 operating system events, interrupts, cache misses, and
                 other interfering events, this article demonstrates
                 that, for the first time, repeatable EM differential
                 attacks are possible. The proposed differential
                 analysis methodology involves precharacterization of
                 the PDA device (thresholding and pattern recognition),
                 and a new frequency-based differential analysis. Unlike
                 previous research, the new methodology does not require
                 perfect alignment of EM frames and is repeatable in the
                 presence of a complex embedded system (including cache
                 misses, operating system events, etc), thus supporting
                 attacks on real embedded systems. This research is
                 important for future wireless embedded systems, which
                 will increasingly demand higher levels of security.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "countermeasures; EM analysis; power attacks;
                 Side-channel analysis",
}

@Article{Ayav:2008:IFT,
  author =       "Tolga Ayav and Pascal Fradet and Alain Girault",
  title =        "Implementing fault-tolerance in real-time programs by
                 automatic program transformations",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376813",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a formal approach to implement
                 fault-tolerance in real-time embedded systems. The
                 initial fault-intolerant system consists of a set of
                 independent periodic tasks scheduled onto a set of
                 fail-silent processors connected by a reliable
                 communication network. We transform the tasks such
                 that, assuming the availability of an additional spare
                 processor, the system tolerates one failure at a time
                 (transient or permanent). Failure detection is
                 implemented using heartbeating, and failure masking
                 using checkpointing and rollback. These techniques are
                 described and implemented by automatic program
                 transformations on the tasks' programs. The proposed
                 formal approach to fault-tolerance by program
                 transformations highlights the benefits of separation
                 of concerns. It allows us to establish correctness
                 properties and to compute optimal values of parameters
                 to minimize fault-tolerance overhead. We also present
                 an implementation of our method, to demonstrate its
                 feasibility and its efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "checkpointing; correctness proofs; Fault-tolerance;
                 heartbeating; program transformations",
}

@Article{Middha:2008:MMS,
  author =       "Bhuvan Middha and Matthew Simpson and Rajeev Barua",
  title =        "{MTSS}: {Multitask} stack sharing for embedded
                 systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376804.1376814",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Out-of-memory errors are a serious source of
                 unreliability in most embedded systems. Applications
                 run out of main memory because of the frequent
                 difficulty of estimating the memory requirement before
                 deployment, either because it depends on input data, or
                 because certain language features prevent estimation.
                 The typical lack of disks and virtual memory in
                 embedded systems has a serious consequence when an
                 out-of-memory error occurs. Without swap space, the
                 system crashes if its memory footprint exceeds the
                 available memory by even 1 byte. This work improves
                 reliability for multitasking embedded systems by
                 proposing MTSS, a multitask stack sharing technique. If
                 a task attempts to overflow the bounds of its allocated
                 stack space, MTSS grows its stack into the stack memory
                 space allocated for other tasks. This technique can
                 avoid the out-of-memory error if the extra space
                 recovered is sufficient to complete execution.
                 Experiments show that MTSS is able to recover an
                 average of 54\% of the stack space allocated to the
                 overflowing task in the free space of other tasks. In
                 addition, unlike conventional systems, MTSS detects
                 memory overflows, allowing the possibility of remedial
                 action or a graceful exit if the recovered space is not
                 enough. Alternatively, MTSS can be used for decreasing
                 the required physical memory of an embedded system by
                 reducing the initial memory allocated to each of the
                 tasks and recovering the deficit by sharing stack with
                 other tasks. The overheads of MTSS are low: the runtime
                 and energy overheads are 3.1\% and 3.2\%, on average.
                 These are tolerable given that reliability is the most
                 important concern in virtually all systems, ahead of
                 other concerns, such as runtime and energy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "cactus stack; data compression; heap overflow; meshed
                 stack; Out-of-memory errors; reliability; reuse;
                 runtime checks; stack overflow; virtual memory",
}

@Article{Inoue:2008:FAC,
  author =       "Hiroaki Inoue and Junji Sakai and Sunao Torii and
                 Masato Edahiro",
  title =        "{FIDES}: an advanced chip multiprocessor platform for
                 secure next generation mobile terminals",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457247",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a secure platform on a chip multiprocessor,
                 FIDES, in order to enable next generation mobile
                 terminals to execute downloaded native applications for
                 Linux. Its most important feature is the higher
                 security based on multigrained separation mechanisms.
                 Four new technologies support the FIDES platform: bus
                 filter logic, XIP kernels, policy separation, and
                 dynamic access control. With these technologies, the
                 FIDES platform can tolerate both application-level and
                 kernel-level bugs on an actual download subsystem.
                 Thus, the best-suited platform to secure next
                 generation mobile terminals is FIDES.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "chip multiprocessor; Secure mobile terminal; SELinux",
}

@Article{Park:2008:ATL,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "Attack-tolerant localization via iterative
                 verification of locations in sensor networks",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457248",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In sensor networks, secure localization ---
                 determining sensors' locations in a hostile, untrusted
                 environment --- is a challenging, but very important,
                 problem that has not yet been addressed effectively.
                 This paper presents an attack-tolerant localization
                 protocol, called {\em Verification for Iterative
                 Localization\/} (VeIL), under which sensors
                 cooperatively safeguard the localization service. By
                 exploiting the high spatiotemporal correlation existing
                 between adjacent nodes, VeIL realizes (a) adaptive
                 management of a profile for normal localization
                 behavior, and (b) distributed detection of false
                 locations advertised by attackers by comparing them
                 against the profile of normal behavior. Our analysis
                 and simulation results show that VeIL achieves
                 high-level tolerance to many critical attacks, and is
                 computationally feasible on resource-limited sensors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Anomaly detection; attack-tolerance; localization;
                 recursive least squares; sensor networks",
}

@Article{Mitra:2008:VAD,
  author =       "Sayan Mitra and Daniel Liberzon and Nancy Lynch",
  title =        "Verifying average dwell time of hybrid systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457249",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Average dwell time (ADT) properties characterize the
                 rate at which a hybrid system performs mode switches.
                 In this article, we present a set of techniques for
                 verifying ADT properties. The stability of a hybrid
                 system A can be verified by combining these techniques
                 with standard methods for checking stability of the
                 individual modes of A.\par

                 We introduce a new type of simulation relation for
                 hybrid automata --- {\em switching simulation\/} ---
                 for establishing that a given automaton A switches more
                 rapidly than another automaton B. We show that the
                 question of whether a given hybrid automaton has ADT
                 {\tau}$_{{\em a \/ }}$ can be answered either by
                 checking an invariant or by solving an optimization
                 problem. For classes of hybrid automata for which
                 invariants can be checked automatically, the
                 invariant-based method yields an automatic method for
                 verifying ADT; for automata that are outside this
                 class, the invariant has to be checked using inductive
                 techniques. The optimization-based method is automatic
                 and is applicable to a restricted class of initialized
                 hybrid automata. A solution of the optimization problem
                 either gives a counterexample execution that violates
                 the ADT property, or it confirms that the automaton
                 indeed satisfies the property. The optimization and the
                 invariant-based methods can be used in combination to
                 find the unknown ADT of a given hybrid automaton.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Hybrid systems; optimization-based verification;
                 simulation relation",
}

@Article{Schirner:2008:QAS,
  author =       "Gunar Schirner and Rainer D{\"o}mer",
  title =        "Quantitative analysis of the speed\slash accuracy
                 trade-off in transaction level modeling",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457250",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing complexity of embedded systems requires
                 modeling at higher levels of abstraction. Transaction
                 level modeling (TLM) has been proposed to abstract
                 communication for high-speed system simulation and
                 rapid design space exploration. Although being widely
                 accepted for its high performance and efficiency, TLM
                 often exhibits a significant loss in model
                 accuracy.\par

                 In this article, we systematically analyze and quantify
                 the speed/accuracy trade-off in TLM. To this end, we
                 provide a classification of TLM abstraction levels
                 based on model granularity and define appropriate
                 metrics and test setups to quantitatively measure and
                 compare the performance and accuracy of such
                 models.\par

                 Addressing several classes of embedded communication
                 protocols, we apply our analysis to three common bus
                 architectures, the industry-standard AMBA advanced
                 high-performance bus (AHB) as an on-chip parallel bus,
                 the controller area network (CAN) as an off-chip serial
                 bus, and the Motorola ColdFire Master Bus as an example
                 for a custom embedded processor bus.\par

                 Based on the analysis of these individual busses, we
                 then generalize our results for a broader conclusion.
                 The general TLM trade-off offers gains of up to four
                 orders of magnitude in simulation speed, generally
                 however, at the price of low accuracy. We conclude
                 further that model granularity is the key to efficient
                 TLM abstraction, and we identify conditions for
                 accuracy of abstract models. As a result, this article
                 provides general guidelines that allow the system
                 designer to navigate the TLM trade-off effectively and
                 choose the most suitable model for the given
                 application with fast and accurate results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "System level design; system-on-chip; transaction level
                 modeling",
}

@Article{Zhou:2008:DAT,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Direct address translation for virtual memory in
                 energy-efficient embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457251",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a methodology for virtual memory
                 support in energy-efficient embedded systems. A
                 holistic approach is proposed, where the combined
                 efforts of compiler, operating system, and hardware
                 architecture achieve a significant system power
                 reductions. The application information extracted and
                 analyzed by the compiler is utilized dynamically by the
                 microarchitecture and the operating system to perform
                 energy-efficient and, for many memory references,
                 time-deterministic address translations. We demonstrate
                 that by using application information regarding virtual
                 memory layout, an efficient and conflict-free
                 translation process can be implemented through the
                 utilization of a small hardware direct translation
                 table (DTT) accessed in an application-specific manner.
                 The set of virtual pages is partitioned into groups,
                 such that for each group only a few of the least
                 significant bits are used as an index to obtain the
                 physical page number. We outline an efficient
                 compile-time algorithm for identifying these groups and
                 allocate their translation entries optimally into the
                 DTT. The introduced hardware is minimal in terms of
                 area, performance, and power overhead, while offering
                 the flexibility of software programmability. This is
                 achieved through a small set of registers and tables,
                 which are made software accessible. We have
                 quantitatively evaluated the proposed methodology on a
                 number of embedded applications, including voice,
                 image, and video processing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "address translation; Low-power embedded systems;
                 virtual memory",
}

@Article{Park:2008:QSL,
  author =       "Jiyong Park and Jaesoo Lee and Saehwa Kim and Seongsoo
                 Hong",
  title =        "Quasistatic shared libraries and {XIP} for memory
                 footprint reduction in {MMU}-less embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457252",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite a rapid decrease in the price of solid state
                 memory devices, system memory is still a very precious
                 resource in embedded systems. The use of shared
                 libraries and execution-in-place (XIP) is known to be
                 effective in significantly reducing memory usage.
                 Unfortunately, many resource-constrained embedded
                 systems lack an MMU, making it extremely difficult to
                 support these techniques. To address this problem, we
                 propose a novel shared library technique called a
                 quasi-static shared library and an XIP, both based on
                 our enhanced position independent code technique. In
                 our quasistatic shared libraries, global symbols are
                 bound to pseudoaddresses at linking time and actual
                 physical addresses are bound at loading time. Unlike
                 conventional shared libraries, they do not require
                 symbol tables that take up valuable memory space and,
                 therefore, allow for expedited address translation at
                 runtime. Our XIP technique is facilitated by our
                 enhanced position independent code where a data section
                 can be arbitrarily located. Both the shared library and
                 XIP techniques are made possible by emulating an MMU's
                 memory mapping feature with a data section base
                 register (DSBR) and a data section base table
                 (DSBT).\par

                 We have implemented these proposed techniques in a
                 commercial ADSL (Asymmetric Digital Subscriber Line)
                 home network gateway equipped with an MMU-less ARM7TDMI
                 processor core, 2MB flash memory, and 16MB RAM. We
                 measured its memory usage and evaluated its performance
                 overhead by conducting a series of experiments. These
                 experiments clearly demonstrate the effectiveness of
                 our techniques in reducing memory usage. The results
                 are impressive: 35\% reduction in flash memory usage
                 when using only the shared library and 30\% reduction
                 in RAM usage when using the shared library and XIP
                 together. These results were achieved with only a
                 negligible performance penalty of less than 4\%. Even
                 though these techniques were applied to uClinux-based
                 embedded systems, they can be used for any MMU-less
                 real-time operating system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Embedded systems; memory footprint reduction;
                 MMU-less; quasi-static linking; shared library",
}

@Article{Yan:2008:AWC,
  author =       "Jun Yan and Wei Zhang",
  title =        "Analyzing the worst-case execution time for
                 instruction caches with prefetching",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457253",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Time predictability is one of the most important
                 design considerations for real-time systems. In this
                 article, we study the impact of instruction prefetching
                 on the worst-case performance of instruction caches. We
                 extend the static cache simulation technique to model
                 and compute the worst-case instruction cache
                 performance with prefetching. The evaluation results
                 show that instruction prefetching can benefit both the
                 average-case and worst-case performance; however, the
                 degree of the worst-case performance improvement due to
                 instruction prefetching is less than that of the
                 average-case performance. As a result, the time
                 variation of computing is increased by instruction
                 prefetching. Also, our experimental results indicate
                 that the prefetching distance can significantly impact
                 the worst-case performance of instruction caches with
                 instruction prefetching. Specifically, when the
                 prefetching distance is equal to the L1 miss penalty,
                 the worst-case execution time with instruction
                 prefetching is minimized.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "hard real-time; instruction caches; instruction
                 prefetching; Worst-case execution time analysis",
}

@Article{Aaraj:2008:ADH,
  author =       "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
  title =        "Analysis and design of a hardware\slash software
                 trusted platform module for embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457246.1457254",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Trusted platforms have been proposed as a promising
                 approach to enhance the security of general-purpose
                 computing systems. However, for many
                 resource-constrained embedded systems, the size and
                 cost overheads of a separate Trusted Platform Module
                 (TPM) chip are not acceptable. One alternative is to
                 use a software-based TPM, which implements TPM
                 functions using software that executes in a protected
                 execution domain on the embedded processor itself.
                 However, since many embedded systems have limited
                 processing capabilities and are battery-powered, it is
                 also important to ensure that the computational and
                 energy requirements for SW-TPMs are acceptable.\par

                 In this article, we perform an evaluation of the energy
                 and execution time overheads for a SW-TPM
                 implementation on a handheld appliance (Sharp Zaurus
                 PDA). We characterize the execution time and energy
                 required by each TPM command through actual
                 measurements on the target platform. We observe that
                 for most commands, overheads are primarily due to the
                 use of 2,048-bit RSA operations that are performed
                 within the SW-TPM. In order to alleviate SW-TPM
                 overheads, we evaluate the use of Elliptic Curve
                 Cryptography (ECC) as a replacement for the RSA
                 algorithm specified in the Trusted Computing Group
                 (TCG) standards. In addition, we also evaluate the
                 overheads of using the SW-TPM in the context of various
                 end applications, including trusted boot of the Linux
                 operating system (OS), a secure VoIP client, and a
                 secure Web browser. Furthermore, we analyze the
                 computational workload involved in running SW-TPM
                 commands using ECC. We then present a suite of hardware
                 and software enhancements to accelerate these commands
                 --- generic custom instructions and exploitation of
                 parallel processing capabilities in multiprocessor
                 systems-on-chip (SoCs). We report results of evaluating
                 the proposed architectures on a commercial embedded
                 processor (Xtensa from Tensilica). Through uniprocessor
                 and multiprocessor optimizations, we could achieve
                 speed-ups of up to 5.71X for individual TPM commands.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Custom instructions; embedded systems; multiprocessor
                 systems",
}

@Article{Suresh:2009:EEE,
  author =       "Dinesh C. Suresh and Banit Agrawal and Jun Yang and
                 Walid Najjar",
  title =        "Energy-efficient encoding techniques for off-chip data
                 buses",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457256",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reducing the power consumption of computing devices
                 has gained a lot of attention recently. Many research
                 works have focused on reducing power consumption in the
                 off-chip buses as they consume a significant amount of
                 total power. Since the bus power consumption is
                 proportional to the switching activity, reducing the
                 bus switching is an effective way to reduce bus power.
                 While numerous techniques exist for reducing bus power
                 in address buses, only a handful of techniques have
                 been proposed for data-bus power reduction, where
                 frequent value encoding (FVE) is the best existing
                 scheme to reduce the transition activity on the data
                 buses.\par

                 In this article, we propose improved frequent value
                 data bus-encoding techniques aimed at reducing more
                 switching activity and, hence, power consumption. We
                 propose three new schemes and five new variations to
                 exploit bit-wise temporal and spatial locality in the
                 data-bus values. Our techniques just use one external
                 control signal and capture bit-wise locality to
                 efficiently encode data values. For all the embedded
                 and SPEC applications we tested, the overall average
                 switching reduction is 53\% over unencoded data and
                 10\% more than the conventional FVE scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "bus switching; encoding; internal capacitances;
                 Low-power data buses",
}

@Article{Kejariwal:2009:ELL,
  author =       "Arun Kejariwal and Alexander V. Veidenbaum and
                 Alexandru Nicolau and Milind Girkar and Xinmin Tian and
                 Hideki Saito",
  title =        "On the exploitation of loop-level parallelism in
                 embedded applications",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457257",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in the silicon technology have enabled
                 increasing support for hardware parallelism in embedded
                 processors. Vector units, multiple processors/cores,
                 multithreading, special-purpose accelerators such as
                 DSPs or cryptographic engines, or a combination of the
                 above have appeared in a number of processors. They
                 serve to address the increasing performance
                 requirements of modern embedded applications. To what
                 extent the available hardware parallelism can be
                 exploited is directly dependent on the amount of
                 parallelism inherent in the given application and the
                 congruence between the granularity of hardware and
                 application parallelism. This paper discusses how
                 loop-level parallelism in embedded applications can be
                 exploited in hardware and software. Specifically, it
                 evaluates the efficacy of automatic loop
                 parallelization and the performance potential of
                 different types of parallelism, viz., true thread-level
                 parallelism (TLP), speculative thread-level parallelism
                 and vector parallelism, when executing loops.
                 Additionally, it discusses the interaction between
                 parallelization and vectorization. Applications from
                 both the industry-standard EEMBC{\reg},$^1$ 1.1, EEMBC
                 2.0 and the academic MiBench embedded benchmark suites
                 are analyzed using the Intel{\reg}$^2$ C compiler. The
                 results show the performance that can be achieved today
                 on real hardware and using a production compiler,
                 provide upper bounds on the performance potential of
                 the different types of thread-level parallelism, and
                 point out a number of issues that need to be addressed
                 to improve performance. The latter include
                 parallelization of libraries such as libc and design of
                 parallel algorithms to allow maximal exploitation of
                 parallelism. The results also point to the need for
                 developing new benchmark suites more suitable to
                 parallel compilation and execution.\par

                 $^1$ Other names and brands may be claimed as the
                 property of others.\par

                 $^2$ Intel is a trademark of Intel Corporation or its
                 subsidiaries in the United States and other
                 countries.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "libraries; Multi-cores; multithreading; parallel
                 loops; programming models; system-on-chip (Soc);
                 thread-level speculation; vectorization",
}

@Article{Hashemi:2009:TDS,
  author =       "Matin Hashemi and Soheil Ghiasi",
  title =        "Throughput-driven synthesis of embedded software for
                 pipelined execution on multicore architectures",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457258",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a methodology for pipelined software
                 synthesis of streaming applications. First, we develop
                 a versatile task assignment algorithm capable of
                 optimizing realistically-arbitrary cost functions for
                 two cores. The algorithm is exact (i.e., theoretically
                 optimal) contrary to existing heuristics. Second, our
                 approximation technique provides an adjustable knob to
                 trade solution quality with algorithm runtime and
                 memory. Third, we develop a recursive heuristic for
                 more cores. FPGA-based emulated experiments validate
                 our theoretical results. The exact algorithm yields 1.7
                 \times throughput improvement. The approximation method
                 offers a range of tradeoff points (e.g., 3 \times
                 faster with 20 \times less memory) while degrading the
                 throughput only 1\% to 5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Embedded software; graph partitioning; multi-core
                 hardware; streaming applications; task assignment",
}

@Article{Chattopadhyay:2009:PPA,
  author =       "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
                 Rakosi and K. Karuri and D. Kammler and R. Leupers and
                 G. Ascheid and H. Meyr",
  title =        "Pre- and postfabrication architecture exploration for
                 partially reconfigurable {VLIW} processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457259",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern application-specific instruction-set processors
                 (ASIPs) face the daunting task of delivering high
                 performance for a wide range of applications. For
                 enhancing the performance, architectural features
                 (e.g., pipelining, VLIW) are often employed in ASIPs,
                 leading to high design complexity. Integrated ASIP
                 design environments like template-based approaches and
                 language-driven approaches provide an answer to this
                 growing design complexity. At the same time, increasing
                 hardware design costs have motivated the processor
                 designers to introduce high flexibility in the
                 processor. Flexibility, in its most effective form, can
                 be introduced to the ASIP by coupling a reconfigurable
                 unit to the base processor. Due to its obvious
                 benefits, several reconfigurable ASIPs (rASIPs) have
                 been designed for years. This design paradigm gained
                 momentum with the advent of coarse-grained FPGAs, where
                 the lack of domain-specific performance common in
                 general-purpose FPGAs are largely overcome by choosing
                 application-dependent basic functional units. These
                 rASIP designs lack a generic flow from high-level
                 specification, resulting into intuitive design
                 decisions and hard-to-retarget processor design tools.
                 Although partial, template-based approaches for rASIP
                 design is existent, a clear design methodology
                 especially for the prefabrication architecture
                 exploration is not present. In order to address this
                 issue, a high-level specification and design
                 methodology for partially reconfigurable VLIW
                 processors is proposed in this article. To show the
                 benefit of this approach a commercial VLIW processor is
                 used as the base architecture and two domains of
                 applications are studied for potential performance
                 gain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ASIP; coarse-grained FPGA; VLIW",
}

@Article{Lin:2009:MAC,
  author =       "Yi-Neng Lin and Ying-Dar Lin and Kuo-Kun Tseng and
                 Yuan-Cheng Lai",
  title =        "Modeling and analysis of core-centric network
                 processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457260",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors can be categorized into two types,
                 the coprocessors-centric model in which the data-plane
                 is handled by coprocessors, and the core-centric model
                 in which the core processes most of the data-plane
                 packets yet offloading some tasks to coprocessors.
                 While the former has been properly explored over
                 various applications, researches regarding the latter
                 remain limited. Based on the previous experience of
                 prototyping the virtual private network (VPN) over the
                 IXP425 network processor, this work aims to derive
                 design implications for the core-centric model
                 performing computational intensive applications. From
                 system and IC vendors' perspectives, the
                 continuous-time Markov chain and Petri net simulations
                 are adopted to explore this architecture. Analytical
                 results prove to be quite inline with those of the
                 simulation and implementation. With subsequent
                 investigation we find that appropriate process run
                 lengths can improve the effective core utilization by
                 2.26 times, and by offloading the throughput boosts 7.5
                 times. The results also suggest single process
                 programming since context switch overhead impacts
                 considerably on the performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "core-centric; embedded system; modeling; Network
                 processor; simulation",
}

@Article{Zhou:2009:CLC,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Cross-layer customization for rapid and low-cost task
                 preemption in multitasked embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457261",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Preemptive multitasking is widely used in many
                 low-cost and real-time embedded applications for its
                 superior hardware utilization. The frequent and
                 asynchronous context switches, however, require the
                 preservation and restoration of the task state, thus
                 resulting in a large number of memory transfer
                 instructions. As a consequence, task responsiveness and
                 application throughput can be significantly
                 deteriorated. To address this problem we propose a
                 cross-layer customization framework which through the
                 close cooperation of compiler, OS, and hardware
                 architecture achieves rapid and low-cost task switch.
                 Application information extracted during compile-time
                 regarding state liveness is exploited in order to
                 preserve a minimal amount of task state on task
                 preemption. We introduce two complementary techniques
                 to implement the application-aware state preservation.
                 The first technique utilizes compiler-generated custom
                 routines which preserve/restore an extremely small live
                 context at judiciously selected points in the
                 application code. The second technique requires more
                 sophisticated hardware support. It employs an
                 OS-controlled register file mapping to achieve a rapid
                 context switch. By mapping a small fraction of the
                 register file in a single clock cycle, a context switch
                 is achieved requiring no memory transfers for the
                 majority of cases to preserve/restore the live state.
                 The effect of aggressively replicated register files,
                 where each task is given its own replica, is achieved
                 with the hardware cost of only adding from 25\% to 50\%
                 extra physical registers. Through the utilization of
                 these novel mechanisms, a significant improvement on
                 task response time is achieved as the context-switch
                 cost is minimized.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Auerbach:2009:LLT,
  author =       "Joshua Auerbach and David F. Bacon and Daniel Iercan
                 and Christoph M. Kirsch and V. T. Rajan and Harald
                 R{\"o}ck and Rainer Trummer",
  title =        "Low-latency time-portable real-time programming with
                 {Exotasks}",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457262",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "{\em Exotasks\/} are a novel Java programming
                 construct that achieve three important goals. They
                 achieve low latency while allowing the fullest use of
                 Java language features, compared to previous attempts
                 to restrict the Java language for use in the
                 submillisecond domain. They support pluggable
                 schedulers, allowing easy implementation of new
                 scheduling paradigms in a real-time Java system. They
                 can achieve deterministic timing, even in the presence
                 of other Java threads, and across changes of hardware
                 and software platform. To achieve these goals, the
                 program is divided into tasks with private heaps. Tasks
                 may be strongly isolated, communicating only with each
                 other and guaranteeing determinism, or weakly isolated,
                 allowing some communication with the rest of the Java
                 application. Scheduling of the tasks' execution,
                 garbage collection, and value passing is accomplished
                 by the pluggable scheduler. Schedulers that we have
                 written employ logical execution time (LET) in
                 association with strong isolation to achieve time
                 portability. We have also built a quad-rotor model
                 helicopter, the JAviator, which we use to evaluate our
                 implementation of Exotasks in an experimental embedded
                 version of IBM's J9 real-time virtual machine. Our
                 experiments show that we are able to maintain very low
                 scheduling jitter and deterministic behavior in the
                 face of variations in both software load and hardware
                 platform. We also show that Exotasks perform nearly as
                 well as Eventrons on a benchmark audio application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Real-time scheduling; time portability; UAVs; virtual
                 machine",
}

@Article{Ahn:2009:RCT,
  author =       "Minwook Ahn and Yunheung Paek",
  title =        "Register coalescing techniques for heterogeneous
                 register architecture with copy sifting",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457263",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Optimistic coalescing has been proven as an elegant
                 and effective technique that provides better chances of
                 safely coloring more registers in register allocation
                 than other coalescing techniques. Its algorithm
                 originally assumes homogeneous registers, which are all
                 gathered in the same register file. Although this
                 register architecture is still common in most
                 general-purpose processors, embedded processors often
                 contain heterogeneous registers, which are scattered in
                 physically different register files dedicated for each
                 dissimilar purpose and use. In this work, we show that
                 optimistic coalescing is also useful for an embedded
                 processor to better handle such heterogeneity of the
                 register architecture, and developed a modified
                 algorithm for optimal coalescing that helps a register
                 allocator. In the experiment, an existing register
                 allocator was able to achieve up to 13.0\% reduction in
                 code size through our coalescing, and avoid many spills
                 that would have been generated without our scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compiler; embedded processors; heterogeneous register
                 architecture; Register allocation; register
                 coalescing",
}

@Article{Mozumdar:2009:CSP,
  author =       "Mohammad Mostafizur Rahman Mozumdar and Luciano
                 Lavagno and Laura Vanzago",
  title =        "A comparison of software platforms for wireless sensor
                 networks: {MANTIS}, {TinyOS}, and {ZigBee}",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1457255.1457264",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless sensor networks are characterized by very
                 tight code size and power constraints and by a lack of
                 well-established standard software development
                 platforms such as Posix. In this article, we present a
                 comparative study between a few fairly different such
                 platforms, namely MANTIS, TinyOS, and ZigBee, when
                 considering them from the application developer's
                 perspective, that is, by focusing mostly on functional
                 aspects, rather than on performance or code size. In
                 other words, we compare both the tasking model used by
                 these platforms and the API libraries they offer.
                 Sensor network applications are basically event based,
                 so most of the software platforms are also built on
                 considering event handling mechanism, however some use
                 a more traditional thread based model. In this article,
                 we consider implementations of a simple generic
                 application in MANTIS, TinyOS, and the Ember ZigBee
                 development framework, with the goal of depicting major
                 differences between these platforms, and suggesting a
                 programming style aimed at maximizing portability
                 between them.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "application porting; software platform; Wireless
                 sensor networks",
}

@Article{Unnikrishnan:2009:RMR,
  author =       "P. Unnikrishnan and G. Chen and M. Kandemir and M.
                 Karakoy and I. Kolcu",
  title =        "Reducing memory requirements of resource-constrained
                 applications",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509289",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded computing platforms are often resource
                 constrained, requiring great design and implementation
                 attention to memory-power-, and heat-related
                 parameters. An important task for a compiler in such
                 platforms is to simplify the process of developing
                 applications for limited memory devices and
                 resource-constrained clients. Focusing on
                 array-intensive embedded applications to be executed on
                 single CPU-based architectures, this work explores how
                 loop-based compiler optimizations can be used for
                 increasing memory location reuse. Our goal is to
                 transform a given application in such a way that the
                 resulting code has fewer cases (as compared to the
                 original code), where the lifetimes of array elements
                 overlap. The reduction in lifetimes of array elements
                 can then be exploited by reusing memory locations as
                 much as possible. Our experimental results indicate
                 that the proposed strategy reduces data space
                 requirements of 15 resource constrained applications by
                 more than 40\%, on average. We also demonstrate how
                 this strategy can be combined with data locality (cache
                 behavior)--enhancing techniques so that a compiler can
                 take advantage of both, that is, reduce data memory
                 requirements and improve data locality at the same
                 time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compilers; embedded system; lifetime; Memory; reuse",
}

@Article{Weng:2009:AMN,
  author =       "Ning Weng and Tilman Wolf",
  title =        "Analytic modeling of network processors for parallel
                 workload mapping",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509290",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors are heterogeneous system-on-chip
                 multiprocessors that are optimized to perform packet
                 forwarding and processing tasks at Gigabit data rates.
                 To meet the performance demands of increasing link
                 speeds and complex network applications, network
                 processors are implemented with several dozen embedded
                 processor cores and hardware accelerators that run
                 multiple packet processing applications in parallel.
                 The parallel nature of the processing system makes it
                 increasingly difficult for application developers to
                 understand and manage resources and map processing
                 tasks to the hardware. To address this problem, we
                 present a methodology for profiling and analyzing
                 network processor applications, mapping processing
                 tasks to a generalized network processor architecture,
                 and analytically determining the expected throughput
                 performance. The key novelty of this work is not only
                 the adaptation of application analysis and mapping
                 algorithms to heterogeneous network processors, but
                 also that the entire process can be automated and
                 hidden from the application developer. Starting with
                 the analysis of a uniprocessor implementation of the
                 application, the process yields a mapping of the
                 partitioned application that shows best performance for
                 a given network processor system. The simplicity of the
                 proposed randomized mapping algorithm allows the use of
                 this methodology in network processor runtime systems
                 where dynamic reallocation of tasks is necessary but
                 processing power is limited. We present results that
                 show the effectiveness of the analysis and mapping
                 methodology as well as its application to design space
                 exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Application profiling; embedded systems;
                 multiprocessor scheduling; network processors",
}

@Article{Tseng:2009:FSA,
  author =       "Kuo-Kun Tseng and Yuan-Cheng Lai and Ying-Dar Lin and
                 Tsern-Huei Lee",
  title =        "A fast scalable automaton-matching accelerator for
                 embedded content processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "19:1--19:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509291",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Home and office network gateways often employ a
                 cost-effective embedded network processor to handle
                 their network services. Such network gateways have
                 received strong demand for applications dealing with
                 intrusion detection, keyword blocking, antivirus and
                 antispam. Accordingly, we were motivated to propose an
                 appropriate fast scalable automaton-matching (FSAM)
                 hardware to accelerate the embedded network processors.
                 Although automaton matching algorithms are robust with
                 deterministic matching time, there is still plenty of
                 room for improving their average-case performance. FSAM
                 employs novel prehash and root-index techniques to
                 accelerate the matching for the nonroot states and the
                 root state, respectively, in automation based hardware.
                 The prehash approach uses some hashing functions to
                 pretest the input substring for the nonroot states
                 while the root-index approach handles multiple bytes in
                 one single matching for the root state. Also, FSAM is
                 applied in a prevalent automaton algorithm,
                 Aho--Corasick (AC), which is often used in many
                 content-filtering applications. When implemented in
                 FPGA, FSAM can perform at the rate of 11.1Gbps with the
                 pattern set of 32,634 bytes, demonstrating that our
                 proposed approach can use a small logic circuit to
                 achieve a competitive performance, although a larger
                 memory is used. Furthermore, the amount of patterns in
                 FSAM is not limited by the amount of internal circuits
                 and memories. If the high-speed external memories are
                 employed, FSAM can support up to 21,302 patterns while
                 maintaining similar high performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Aho--Corasick; automaton; Bloom filter; content
                 filtering; String matching",
}

@Article{Reshadi:2009:HCS,
  author =       "Mehrdad Reshadi and Prabhat Mishra and Nikil Dutt",
  title =        "Hybrid-compiled simulation: an efficient technique for
                 instruction-set architecture simulation",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "20:1--20:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509292",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Instruction-set simulators are critical tools for the
                 exploration and validation of new processor
                 architectures. Due to the increasing complexity of
                 architectures and time-to-market pressure, performance
                 is the most important feature of an instruction-set
                 simulator. Interpretive simulators are flexible but
                 slow, whereas compiled simulators deliver speed at the
                 cost of flexibility and compilation overhead. This
                 article presents a hybrid instruction-set-compiled
                 simulation (HISCS) technique for generation of fast
                 instruction-set simulators that combines the benefit of
                 both compiled and interpretive simulation. This article
                 makes two important contributions: (i) it improves the
                 interpretive simulation performance by applying
                 compiled simulation at the instruction level using a
                 novel template-customization technique to generate
                 optimized decoded instructions during compile time; and
                 (ii) it reduces the compile-time overhead by combining
                 the benefits of both static and dynamic-compiled
                 simulation. Our experimental results using two
                 contemporary processors (ARM7 and SPARC) demonstrate an
                 order-of-magnitude reduction in compilation time as
                 well as a 70\% performance improvement, on average,
                 over the best-known published result in instruction-set
                 simulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Compiled simulation; instruction set architecture;
                 interpretive simulation; partial evaluation",
}

@Article{Nguyen:2009:MAE,
  author =       "Nghi Nguyen and Angel Dominguez and Rajeev Barua",
  title =        "Memory allocation for embedded systems with a
                 compile-time-unknown scratch-pad size",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "21:1--21:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509293",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents the first memory allocation
                 scheme for embedded systems having a scratch-pad memory
                 whose size is unknown at compile time. A scratch-pad
                 memory (SPM) is a fast compiler-managed SRAM that
                 replaces the hardware-managed cache. All existing
                 memory allocation schemes for SPM require the SPM size
                 to be known at compile time. Unfortunately, because of
                 this constraint, the resulting executable is tied to
                 that size of SPM and is not portable to other processor
                 implementations having a different SPM size.
                 Size-portable code is valuable when programs are
                 downloaded during deployment either via a network or
                 portable media. Code downloads are used for fixing bugs
                 or for enhancing functionality. The presence of
                 different SPM sizes in different devices is common
                 because of the evolution in VLSI technology across
                 years. The result is that SPM cannot be used in such
                 situations with downloaded codes.\par

                 To overcome this limitation, our work presents a
                 compiler method whose resulting executable is portable
                 across SPMs of any size. Our technique is to employ a
                 customized installer software, which decides the SPM
                 allocation just before the program's first run, since
                 the SPM size can be discovered at that time. The
                 installer then, based on the decided allocation,
                 modifies the program executable accordingly. The
                 resulting executable places frequently used objects in
                 SPM, considering both code and data for placement. To
                 keep the overhead low, much of the preprocessing for
                 the allocation is done at compile time. Results show
                 that our benchmarks average a 41\% speedup versus an
                 all-DRAM allocation, while the optimal static
                 allocation scheme, which knows the SPM size at compile
                 time and is thus an unachievable upper-bound and is
                 only slightly faster (45\% faster than all-DRAM).
                 Results also show that the overhead from our customized
                 installer averages about 1.5\% in code size, 2\% in
                 runtime, and 3\% in compile time for our benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compiler; data linked list; downloadable codes;
                 embedded loading; embedded systems; Memory allocation;
                 scratch-pad",
}

@Article{Lysecky:2009:DIM,
  author =       "Roman Lysecky and Frank Vahid",
  title =        "Design and implementation of a {MicroBlaze}-based warp
                 processor",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "22:1--22:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509294",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While soft processor cores provided by FPGA vendors
                 offer designers with increased flexibility, such
                 processors typically incur penalties in performance and
                 energy consumption compared to hard processor core
                 alternatives. The recently developed technology of warp
                 processing can help reduce those penalties. Warp
                 processing is the dynamic and transparent
                 transformation of critical software regions from
                 microprocessor execution to much faster circuit
                 execution on an FPGA. In this article, we describe an
                 implementation of a warp processor on a Xilinx
                 Virtex-II Pro and Spartan3 FPGAs incorporating one or
                 more MicroBlaze soft processor cores. We further
                 provide a detailed analysis of the energy overhead of
                 dynamically partitioning an application's kernels to
                 hardware executing within an FPGA. Considering an
                 implementation that periodically partitions the
                 executing application once every minute, a
                 MicroBlaze-based warp processor implemented on a
                 Spartan3 FPGA achieves average speedups of 5.8\times
                 and energy reductions of 49\% compared to the
                 MicroBlaze soft processor core alone --- providing
                 competitive performance and energy consumption compared
                 to existing hard processor cores.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "configurable logic; dynamic optimization; FPGA;
                 hardware/software partitioning; just-in-time (JIT)
                 compilation; soft processor cores; Warp processors",
}

@Article{Bai:2009:MME,
  author =       "Lan S. Bai and Lei Yang and Robert P. Dick",
  title =        "{MEMMU}: {Memory} expansion for {MMU}-less embedded
                 systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509295",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Random access memory (RAM) is tightly constrained in
                 the least expensive, lowest-power embedded systems such
                 as sensor network nodes and portable consumer
                 electronics. The most widely used sensor network nodes
                 have only 4 to 10KB of RAM and do not contain memory
                 management units (MMUs). It is difficult to implement
                 complex applications under such tight memory
                 constraints. Nonetheless, price and power-consumption
                 constraints make it unlikely that increases in RAM in
                 these systems will keep pace with the increasing memory
                 requirements of applications.\par

                 We propose the use of automated compile-time and
                 runtime techniques to increase the amount of usable
                 memory in MMU-less embedded systems. The proposed
                 techniques do not increase hardware cost, and require
                 few or no changes to existing applications. We have
                 developed runtime library routines and compiler
                 transformations to control and optimize the automatic
                 migration of application data between compressed and
                 uncompressed memory regions, as well as a fast
                 compression algorithm well suited to this application.
                 These techniques were experimentally evaluated on
                 Crossbow TelosB sensor network nodes running a number
                 of data-collection and signal-processing applications.
                 Our results indicate that available memory can be
                 increased by up to 50\% with less than 10\% performance
                 degradation for most benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Data compression; embedded system; wireless sensor
                 network",
}

@Article{Doblander:2009:NSF,
  author =       "Andreas Doblander and Andreas Zoufal and Bernhard
                 Rinner",
  title =        "A novel software framework for embedded multiprocessor
                 smart cameras",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1509288.1509296",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed smart cameras (DSC) are an emerging
                 technology for a broad range of important applications
                 including smart rooms, surveillance, entertainment,
                 tracking, and motion analysis. By having access to many
                 views and through cooperation among the individual
                 cameras, these DSCs have the potential to realize many
                 more complex and challenging applications than
                 single-camera systems.\par

                 This article focuses on the system-level software
                 required for efficient streaming applications on single
                 smart cameras as well as on networks of DSCs. Embedded
                 platforms with limited resources do not provide
                 middleware services well known on general-purpose
                 platforms. Our software framework supports transparent
                 intra- and interprocessor communication while keeping
                 the memory and computation overhead very low. The
                 software framework is based on a publisher--subscriber
                 architecture and provides mechanisms for dynamically
                 loading and unloading software components as well as
                 for graceful degradation in case of software- and
                 hardware-related faults. The software framework has
                 been completely implemented and tested on our embedded
                 smart cameras consisting of an ARM-based network
                 processor and several digital signal processors. Two
                 case studies demonstrate the feasibility of our
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "distributed embedded systems; fault tolerance;
                 publisher--subscriber; Smart cameras; video
                 surveillance",
}

@Article{Li:2009:ELC,
  author =       "Zhiyuan Li and Santosh Pande",
  title =        "Editorial: {Languages}, compilers, and tools for
                 embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550988",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Murray:2009:CTI,
  author =       "Alastair C. Murray and Richard V. Bennett and
                 Bj{\"o}rn Franke and Nigel Topham",
  title =        "Code transformation and instruction set extension",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550989",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The demand for flexible embedded solutions and short
                 time-to-market has led to the development of extensible
                 processors that allow for customization through
                 user-defined instruction set extensions (ISEs). These
                 are usually identified from plain C sources. In this
                 article, we propose a combined exploration of code
                 transformations and ISE identification. The resulting
                 performance of such a combination has been measured on
                 two benchmark suites. Our results demonstrate that
                 combined code transformations and ISEs can yield
                 average performance improvements of 49\%. This
                 outperforms ISEs when applied in isolation, and in
                 extreme cases yields a speed-up of 2.85.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ASIPs; compilers; Customizable processors; design
                 space exploration; instruction set extension;
                 source-level transformations",
}

@Article{Hu:2009:CAS,
  author =       "Jie Hu and Feihui Li and Vijay Degalahal and Mahmut
                 Kandemir and N. Vijaykrishnan and Mary J. Irwin",
  title =        "Compiler-assisted soft error detection under
                 performance and energy constraints in embedded
                 systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Soft errors induced by terrestrial radiation are
                 becoming a significant concern in architectures
                 designed in newer technologies. If left undetected,
                 these errors can result in catastrophic consequences or
                 costly maintenance problems in different embedded
                 applications. In this article, we focus on utilizing
                 the compiler's help in duplicating instructions for
                 error detection in VLIW datapaths. The instruction
                 duplication mechanism is further supported by a
                 hardware enhancement for efficient result verification,
                 which avoids the need of additional comparison
                 instructions. In the proposed approach, the compiler
                 determines the instruction schedule by balancing the
                 permissible performance degradation and the energy
                 constraint with the required degree of duplication. Our
                 experimental results show that our algorithms allow the
                 designer to perform trade-off analysis between
                 performance, reliability, and energy consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compilers; Embedded systems; energy consumption;
                 instruction duplication; reliability; soft errors",
}

@Article{Jafari:2009:EPR,
  author =       "Roozbeh Jafari and Hassan Ghasemzadeh and Foad Dabiri
                 and Ani Nahapetian and Majid Sarrafzadeh",
  title =        "An efficient placement and routing technique for
                 fault-tolerant distributed embedded computing",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents an efficient technique for
                 placement and routing of sensors/actuators and
                 processing units in a grid network. The driver
                 application that we present is a medical jacket, which
                 requires an extremely high level of robustness and
                 fault tolerance. The power consumption of such jacket
                 is another key technological constraint. Our proposed
                 interconnection network is a mesh of wires. A jacket
                 made of fabric and wires would be susceptible to
                 accidental damage via tears. By modeling the tears, we
                 evaluate the probability of having failures on every
                 segment of wires in our mesh interconnection network.
                 Then, we study two problems of placement and routing in
                 the sensor networks such that the fault tolerance is
                 maximized while the power consumption is minimized. We
                 develop efficient integer linear programming (ILP)
                 formulations to address these problems and perform both
                 placement and routing, simultaneously. This ensures
                 that the solution is a lower bound for both problems.
                 We evaluate the effectiveness of our proposed
                 techniques on a variety of benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Distributed embedded system; fault tolerance;
                 placement; routing; sensor networks",
}

@Article{Lee:2009:CIA,
  author =       "Edward A. Lee and Xiaojun Liu and Stephen
                 Neuendorffer",
  title =        "Classes and inheritance in actor-oriented design",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Actor-oriented components emphasize concurrency and
                 temporal semantics and are used for modeling and
                 designing embedded software and hardware. Actors
                 interact with one another through ports via a messaging
                 schema that can follow any of several concurrent
                 semantics. Domain-specific actor-oriented languages and
                 frameworks are common (Simulink, LabVIEW, SystemC,
                 etc.). However, they lack many modularity and
                 abstraction mechanisms that programmers have become
                 accustomed to in object-oriented components, such as
                 classes, inheritance, interfaces, and polymorphism,
                 except as inherited from the host language. This
                 article shows a form that such mechanisms can take in
                 actor-oriented components, gives a formal structure,
                 and describes a prototype implementation. The
                 mechanisms support actor-oriented class definitions,
                 subclassing, inheritance, and overriding. The formal
                 structure imposes structural constraints on a model
                 (mainly the ``derivation invariant'') that lead to a
                 policy to govern inheritance. In particular, the
                 structural constraints permit a disciplined form of
                 multiple inheritance with unambiguous inheritance and
                 overriding behavior. The policy is based formally on a
                 generalized ultrametric space with some remarkable
                 properties. In this space, inheritance is favored when
                 actors are ``closer'' (in the generalized ultrametric),
                 and we show that when inheritance can occur from
                 multiple sources, one source is always unambiguously
                 closer than the other.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Actors; components; generalized ultrametric;
                 inheritance; interfaces; overriding; type systems",
}

@Article{Riccobene:2009:SCB,
  author =       "Elvinia Riccobene and Patrizia Scandurra and Sara
                 Bocchio and Alberto Rosti and Luigi Lavazza and Luigi
                 Mantellini",
  title =        "{SystemC\slash C-based} model-driven design for
                 embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550993",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article summarizes our effort, since 2004 up to
                 the present time, for improving the current industrial
                 Systems-on-Chip and Embedded Systems design by joining
                 the capabilities of the unified modeling language (UML)
                 and SystemC/C programming languages to operate at
                 system-level. The proposed approach exploits the OMG
                 model-driven architecture --- a framework for
                 Model-driven Engineering --- capabilities of reducing
                 abstract, coarse-grained and platform-independent
                 system models to fine-grained and platform-specific
                 models. We first defined a design methodology and a
                 development flow for the hardware, based on a SystemC
                 UML profile and encompassing different levels of
                 abstraction. We then included a multithread C UML
                 profile for modelling software applications. Both
                 SystemC/C profiles are consistent sets of modelling
                 constructs designed to lift the programming features
                 (both structural and behavioral) of the two coding
                 languages to the UML modeling level. The new codesign
                 flow is supported by an environment, which allows
                 system modeling at higher abstraction levels (from a
                 functional executable level to a register transfer
                 level) and supports automatic
                 code-generation/back-annotation from/to UML models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "C; ES; MDE; SoC; SystemC; UML",
}

@Article{Bini:2009:MCE,
  author =       "Enrico Bini and Giorgio Buttazzo and Giuseppe Lipari",
  title =        "Minimizing {CPU} energy in real-time systems with
                 discrete speed management",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550994",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a general framework to analyze
                 and design embedded systems minimizing the energy
                 consumption without violating timing requirements. A
                 set of realistic assumptions is considered in the model
                 in order to apply the results in practical real-time
                 applications. The processor is assumed to have as a set
                 of discrete operating modes, each characterized by
                 speed and power consumption. The energy overhead and
                 the transition delay incurred during mode switches are
                 considered. Task computation times are modeled with a
                 part that scales with the speed and a part having a
                 fixed duration, to take I/O operations into
                 account.\par

                 The proposed method allows to compute the optimal
                 sequence of voltage/speed changes that approximates the
                 minimum continuous speed, which guarantees the
                 feasibility of a given set of real-time tasks, without
                 violating the deadline constraints. The analysis is
                 performed both under fixed and dynamic priority
                 assignments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "CPU energy; Real-time systems",
}

@Article{Koo:2009:FTG,
  author =       "Heon-Mo Koo and Prabhat Mishra",
  title =        "Functional test generation using design and property
                 decomposition techniques",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1550987.1550995",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Functional verification of microprocessors is one of
                 the most complex and expensive tasks in the current
                 system-on-chip design methodology. Simulation using
                 functional test vectors is the most widely used form of
                 processor validation. A significant bottleneck in the
                 validation of such systems is the lack of automated
                 techniques for directed test generation. While existing
                 model checking--based approaches have proposed several
                 promising ideas for automated test generation, many
                 challenges remain in applying them to industrial
                 microprocessors. The time and resources required for
                 test generation using existing model checking--based
                 techniques can be prohibitively large. This article
                 presents an efficient test generation technique using
                 decompositional model checking. The contribution of the
                 article is the development of both property and design
                 decomposition procedures for efficient test generation
                 of pipelined processors. Our experimental results using
                 a multi-issue MIPS processor and an industrial
                 processor based on Power Architecture\TM{} Technology
                 demonstrate several orders-of-magnitude reduction in
                 validation effort by drastically reducing both test
                 generation time and test program length.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "design decomposition; functional validation; Model
                 checking; pipelined processor; property decomposition;
                 test generation",
}

@Article{Plaks:2009:GECa,
  author =       "Toomas P. Plaks and Neil Bergmann and Bernard
                 Pottier",
  title =        "Guest editorial {CAPA'08} configurable computing:
                 {Configuring} algorithms, processes, and architecture
                 issue {I}: {Configuring} algorithms and processes",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "1:1--1:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ferri:2009:RIF,
  author =       "B. H. Ferri and A. A. Ferri",
  title =        "Reconfiguration of {IIR} filters in response to
                 computer resource availability",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "2:1--2:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2009:TTD,
  author =       "Xiaojun Wang and Miriam Leeser",
  title =        "A truly two-dimensional systolic array {FPGA}
                 implementation of {QR} decomposition",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "3:1--3:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DoCarmoLucas:2009:ADF,
  author =       "Amilcar {Do Carmo Lucas} and Henning Sahlbach and Sean
                 Whitty and Sven Heithecker and Rolf Ernst",
  title =        "Application development with the {FlexWAFE} real-time
                 stream processing architecture for {FPGAs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "4:1--4:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nahapetian:2009:AAS,
  author =       "Ani Nahapetian and Philip Brisk and Soheil Ghiasi and
                 Majid Sarrafzadeh",
  title =        "An approximation algorithm for scheduling on
                 heterogeneous reconfigurable resources",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "5:1--5:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Patterson:2009:SMB,
  author =       "C. Patterson and P. Athanas and M. Shelburne and J.
                 Bowen and J. Sur{\'\i}s and T. Dunham and J. Rice",
  title =        "Slotless module-based reconfiguration of embedded
                 {FPGAs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "6:1--6:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lloyd:2009:PSN,
  author =       "Scott Lloyd and Quinn Snell",
  title =        "A packet-switched network architecture for
                 reconfigurable computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "7:1--7:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lubbers:2009:RMP,
  author =       "Enno L{\"u}bbers and Marco Platzner",
  title =        "{ReconOS}: {Multithreaded} programming for
                 reconfigurable computers",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "8:1--8:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2009:SFB,
  author =       "Jian Huang and Matthew Parris and Jooheung Lee and
                 Ronald F. Demara",
  title =        "Scalable {FPGA}-based architecture for {DCT}
                 computation using dynamic partial reconfiguration",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "9:1--9:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Plaks:2009:GECb,
  author =       "Toomas P. Plaks and Neil Bergmann and Bernard
                 Pottier",
  title =        "Guest editorial {CAPA'08 Configurable} computing:
                 {Configuring} algorithms, processes, and architecture
                 {Issue II}: {Configuring} hardware architecture",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "10:1--10:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alle:2009:RRR,
  author =       "Mythri Alle and Keshavan Varadarajan and Alexander
                 Fell and Ramesh Reddy C. and Nimmy Joseph and Saptarsi
                 Das and Prasenjit Biswas and Jugantor Chetia and Adarsh
                 Rao and S. K. Nandy and Ranjani Narayan",
  title =        "{REDEFINE}: {Runtime} reconfigurable polymorphic
                 {ASIC}",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Banerjee:2009:FPU,
  author =       "Pritha Banerjee and Susmita Sur-Kolay and Arijit
                 Bishnu and Sandip Das and Subhas C. Nandy and Subhasis
                 Bhattacharjee",
  title =        "{FPGA} placement using space-filling curves: {Theory}
                 meets practice",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Beckett:2009:PSM,
  author =       "Paul Beckett",
  title =        "Power scalability in a mesh-connected reconfigurable
                 architecture",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2009:STT,
  author =       "Weisheng Zhao and Eric Belhaire and Claude Chappert
                 and Pascale Mazoyer",
  title =        "Spin transfer torque {(STT)-MRAM--based} runtime
                 reconfiguration {FPGA} circuit",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2010:CPV,
  author =       "Hyung Sun Lee and Byung Kook Kim",
  title =        "Coscheduling of processor voltage and control task
                 period for energy-efficient control systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reddy:2010:CPE,
  author =       "Rakesh Reddy and Peter Petrov",
  title =        "Cache partitioning for energy-efficient and
                 interference-free embedded multitasking",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Geelen:2010:MES,
  author =       "Bert Geelen and Vissarion Ferentinos and Francky
                 Catthoor and Gauthier Lafruit and Diederik Verkest and
                 Rudy Lauwereins and Thanos Stouraitis",
  title =        "Modeling and exploiting spatial locality trade-offs in
                 wavelet-based applications under varying resource
                 requirements",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bueno:2010:ORA,
  author =       "David Bueno and Chris Conger and Alan D. George",
  title =        "Optimizing {rapidIO} architectures for onboard
                 processing",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Inoue:2010:RSC,
  author =       "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
  title =        "A robust seamless communication architecture for
                 next-generation mobile terminals on multi-{CPU}
                 {SoCs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Manzanares:2010:CER,
  author =       "Adam Manzanares and Xiaojun Ruan and Shu Yin and Xiao
                 Qin and Adam Roth and Mais Najim",
  title =        "Conserving energy in real-time storage systems with
                 {I/O} burstiness",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Courbot:2010:EBD,
  author =       "Alexandre Courbot and Gilles Grimaud and Jean-Jacques
                 Vandewalle",
  title =        "Efficient off-board deployment and customization of
                 virtual machine-based embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xue:2010:IRP,
  author =       "Chun Jason Xue and Jingtong Hu and Zili Shao and Edwin
                 Sha",
  title =        "Iterational retiming with partitioning: {Loop}
                 scheduling with complete memory latency hiding",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cho:2010:LFS,
  author =       "Hyeonjoong Cho and Binoy Ravindran and E. Douglas
                 Jensen",
  title =        "Lock-free synchronization for dynamic embedded
                 real-time systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Musoll:2010:CEL,
  author =       "Enric Musoll",
  title =        "A cost-effective load-balancing policy for tile-based,
                 massive multi-core packet processors",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guang:2010:HAM,
  author =       "Liang Guang and Ethiopia Nigussie and Pekka Rantala
                 and Jouni Isoaho and Hannu Tenhunen",
  title =        "Hierarchical agent monitoring design approach towards
                 self-aware parallel systems-on-chip",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{McLoughlin:2010:RTR,
  author =       "Ian Vince McLoughlin and Timo Rolf Bretschneider",
  title =        "Reliability through redundant parallelism for
                 micro-satellite computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2010:OMC,
  author =       "Lei Yang and Robert P. Dick and Haris Lekatsas and
                 Srimat Chakradhar",
  title =        "Online memory compression for embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cesana:2010:MBM,
  author =       "Ulpian Cesana and Zhen He",
  title =        "Multi-buffer manager: {Energy-efficient} buffer
                 manager for databases on flash memory",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tichy:2010:GAF,
  author =       "Milan Tichy and Jan Schier and David Gregg",
  title =        "{GSFAP} adaptive filtering using log arithmetic for
                 resource-constrained embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2010:HPO,
  author =       "Lei Yang and Robert P. Dick and Haris Lekatsas and
                 Srimat Chakradhar",
  title =        "High-performance operating system controlled online
                 memory compression",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721696",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Online memory compression is a technology that
                 increases the amount of memory available to
                 applications by dynamically compressing and
                 decompressing their working datasets on demand. It has
                 proven extremely useful in embedded systems with tight
                 physical RAM constraints. The technology can be used to
                 increase functionality, reduce size, and reduce cost,
                 without modifying applications or hardware. This
                 article presents a new software-based online memory
                 compression algorithm for embedded systems. In
                 comparison with the best algorithms used in online
                 memory compression, our new algorithm has a competitive
                 compression ratio but is twice as fast. In addition, we
                 describe several practical problems encountered in
                 developing an online memory compression infrastructure
                 and present solutions. We present a method of
                 adaptively managing the uncompressed and compressed
                 memory regions during application execution. This
                 memory management scheme adapts to the predicted memory
                 requirements of applications. It permits efficient
                 compression for a wide range of applications. We have
                 evaluated our techniques on a portable embedded device
                 and have found that the memory available to
                 applications can be increased by 2.5\times with
                 negligible performance and power consumption penalties,
                 and with no changes to hardware or applications. Our
                 techniques allow existing applications to execute with
                 less physical memory. They also allow applications with
                 larger working datasets to execute on unchanged
                 embedded system hardware, thereby increasing
                 functionality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "compression; Embedded system; memory",
}

@Article{Wu:2010:SAF,
  author =       "Chin-Hsien Wu",
  title =        "A self-adjusting flash translation layer for
                 resource-limited embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721697",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The capacity of flash memory storage systems has been
                 growing at a speed similar to many other storage
                 systems. In order to properly manage the product cost,
                 vendors face serious challenges in resource-limited
                 embedded systems. In this article, a self-adjusting
                 flash translation layer is proposed with low memory
                 requirements. The objective of the design is to provide
                 efficient address mapping and low garbage collection
                 overhead, while controlling main memory usage of the
                 flash translation layer. The capability of the design
                 is evaluated over realistic workloads and benchmarks.
                 System performance is also guaranteed under low memory
                 requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "embedded systems; Flash memory; flash translation
                 layer; storage systems",
}

@Article{Irturk:2010:GAG,
  author =       "Ali Irturk and Bridget Benson and Shahnam Mirzaei and
                 Ryan Kastner",
  title =        "{GUSTO}: an automatic generation and optimization tool
                 for matrix inversion architectures",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721698",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Matrix inversion is a common function found in many
                 algorithms used in wireless communication systems. As
                 FPGAs become an increasingly attractive platform for
                 wireless communication, it is important to understand
                 the trade-offs in designing a matrix inversion core on
                 an FPGA. This article describes a matrix inversion core
                 generator tool, GUSTO, that we developed to ease the
                 design space exploration across different matrix
                 inversion architectures. GUSTO is the first tool of its
                 kind to provide automatic generation of a variety of
                 general-purpose matrix inversion architectures with
                 different parameterization options. GUSTO also provides
                 an optimized application-specific architecture with an
                 average of 59\% area decrease and 3X throughput
                 increase over its general-purpose architecture. The
                 optimized architectures generated by GUSTO provide
                 comparable results to published matrix inversion
                 architecture implementations, but offer the advantage
                 of providing the designer the ability to study the
                 trade-offs between architectures with different design
                 parameters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "design space exploration; Field programmable gate
                 arrays (FPGAs); matrix inversion",
}

@Article{Yu:2010:FSB,
  author =       "Yue Yu and Shangping Ren and Ophir Frieder",
  title =        "Feasibility of semiring-based timing constraints",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721699",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time and embedded applications often involve
                 different types of timing constraints, such as
                 precedence constraints and real-time constraints. As
                 real-time and embedded applications further advance,
                 new timing constraint types are emerging as well.
                 Recent research on interval-based timing constraints is
                 an example. Thus, it is important to have a uniformed
                 timing constraint representation so that a generalized
                 approach can be developed to analyze the variant
                 constraint types.\par

                 A semiring-based timing constraint model is introduced
                 to generalize the representations of different
                 constraint types. Under this model, we develop an
                 algorithm to check the satisfaction feasibility for a
                 given set of semiring-based timing constraints. This
                 algorithm provides better performance in the average
                 case as compared to applying the Bellman-Ford algorithm
                 directly on the constraint set.\par

                 In addition, for a set of feasible semiring-based
                 timing constraints, event occurrence points that
                 satisfy the constraint set form a (hyperdimension)
                 feasible region. For the given two sets of timing
                 constraints, we develop a necessary and sufficient
                 condition to testify whether the two constraint sets'
                 feasible regions have an inclusion relation. If one
                 feasible region is included in the other, we know that
                 the real-time event occurrences that satisfy the
                 included constraint set will necessarily satisfy the
                 including set.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "timing constraint feasibility analysis; Timing
                 constraints",
}

@Article{Tahaee:2010:PAP,
  author =       "Seyed-Abdoreza Tahaee and Amir Hossein Jahangir",
  title =        "A polynomial algorithm for partitioning problems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721700",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article takes a theoretical approach to focus on
                 the algorithmic properties of hardware/software
                 partitioning. It proposes a method with polynomial
                 complexity to find the global optimum of an NP-hard
                 model partitioning problem for 75\% of occurrences
                 under some practical conditions. The global optimum is
                 approached with a lower bound distance for the
                 remaining 25\%. Furthermore, this approach ensures
                 finding the 2-approximate of the global optimum
                 partition in 97\% of instances where technical
                 assumptions exist. The strategy is based on
                 intelligently changing the parameters of the polynomial
                 model of the partitioning problem to force it to
                 produce (or approach) the exact solution to the NP-hard
                 model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "hardware/software codesign; maximum flow minimum cut
                 problem; NP-hard problems; Partitioning problem",
}

@Article{Peng:2010:OWZ,
  author =       "Huan-Kai Peng and Youn-Long Lin",
  title =        "An optimal warning-zone-length assignment algorithm
                 for real-time and multiple-{QoS} on-chip bus
                 arbitration",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721701",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In an advanced System-on-Chip (SoC) for real-time
                 applications, the arbiter of its on-chip communication
                 subsystem needs to support multiple QoS criteria while
                 providing a hard real-time guarantee. To fulfill both
                 objectives, the arbitration algorithm must dynamically
                 switch between NonReal-Time (NRT) and Real-Time (RT)
                 modes such that use of the RT mode is minimized to best
                 accommodate the overall QoS criteria. In this article,
                 we define a model for this problem, and propose optimal
                 solutions to its associated problems with static and
                 dynamic warning-zone-length assignment. Compared with
                 previous works, the proposed approach enables a bus
                 arbiter to use much less RT mode in providing a
                 Real-Time (RT) guarantee and, therefore, gives the
                 arbiter more opportunity to employ non-RT modes to
                 achieve better overall QoS. Experimental results show
                 that the proposed approach reduces RT mode usage by as
                 much as 37.1\%. Moreover, that reduction in RT mode
                 usage helps cut the execution time by 27.0\% when
                 applying our approach to an industrial DRAM controller.
                 Another case study on an AMBA-compliant
                 ultra-high-resolution H.264 decoder IP shows that the
                 proposed approach reduces RT mode usage by 26.4\%,
                 which leads to an average reduction of 10.4\% in
                 decoding time. Finally, when implementing a 16 master
                 arbiter, it costs only 6.9K and 9.5K gates of overhead
                 using the proposed static and dynamic approach,
                 respectively. Therefore, the proposed approach is
                 suitable for real-time SoC applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "on-chip communication; QoS; real-time scheduling;
                 System-on-Chip",
}

@Article{Schlich:2010:MCS,
  author =       "Bastian Schlich",
  title =        "Model checking of software for microcontrollers",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721702",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The interest of industries in model checking software
                 for microcontrollers is increasing. However, there are
                 currently no appropriate tools that can be applied by
                 embedded systems developers for the direct verification
                 of software for microcontrollers without the need for
                 manual modeling. This article describes a new approach
                 to model checking software for microcontrollers, which
                 verifies the assembly code of the software. The state
                 space is built using a tailored simulator, which
                 abstracts from time, handles nondeterminism, and
                 creates an overapproximation of the behavior shown by
                 the real microcontroller. Within this simulator, we
                 apply abstraction techniques to tackle the
                 state-explosion problem. In our approach, we combine
                 different formal methods, namely, model checking,
                 static analysis, and abstract interpretation. We also
                 combine explicit and symbolic model checking
                 techniques. This article presents a case study using
                 several programs to demonstrate the efficiency of the
                 applied abstraction techniques and to show the
                 applicability of this approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Assembly code; formal verification; microcontroller;
                 model checking; static analysis",
}

@Article{Bombieri:2010:SND,
  author =       "Nicola Bombieri and Franco Fummi and Davide Quaglia",
  title =        "System\slash network design-space exploration based on
                 {TLM} for networked embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721703",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a methodology for the design of
                 Networked Embedded Systems (NESs), which extends
                 Transaction Level Modeling (TLM) to perform
                 system/network design-space exploration. As a result, a
                 new design dimension is added to the traditional TLM
                 refinement process to represent network configuration
                 alternatives. Each network configuration can be used to
                 drive both architecture exploration and system
                 validation after each refinement step. A system/network
                 simulation taxonomy is investigated aiming at precisely
                 identifying the role of cosimulation in system/network
                 design-space exploration. Furthermore, a general
                 criterion to map functionalities to system and network
                 models is presented. As a case study, the proposed
                 methodology is applied to the design of a Voice-over-IP
                 client.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "networked embedded systems; Transaction level
                 modeling",
}

@Article{Lin:2010:SSA,
  author =       "Chang Hong Lin and Marilyn Wolf and Xenefon Koutsoukos
                 and Sandeep Neema and Janos Sztipanovits",
  title =        "System and software architectures of distributed smart
                 cameras",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721704",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we describe a distributed,
                 peer-to-peer gesture recognition system along with a
                 software architecture modeling technique and authority
                 control protocol for ubiquitous cameras. This system
                 performs gesture recognition in real time by combining
                 imagery from multiple cameras without using a central
                 server. We propose a system architecture that uses a
                 network of inexpensive cameras to perform in-network
                 video processing. A methodology for transforming
                 well-designed single-node algorithm to distributed
                 system is also proposed. Applications for ubiquitous
                 cameras can be modeled as the composition of a
                 finite-state machine of the system, functional
                 services, and middleware. A service-oriented software
                 architecture is proposed to dynamically reconfigure
                 services when system state changes. By exchanging data
                 and control messages between neighboring sensors, each
                 node can maintain broader view of the environment with
                 integrated video-processing results. Our prototype
                 system is built on Windows machines, and uses standard
                 video cameras as sensors and local network as a
                 communication channel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Distributed cameras; smart camera; software
                 architecture",
}

@Article{Zhou:2010:MMS,
  author =       "Gang Zhou and Yafeng Wu and Ting Yan and Tian He and
                 Chengdu Huang and John A. Stankovic and Tarek F.
                 Abdelzaher",
  title =        "A multifrequency {MAC} specially designed for wireless
                 sensor network applications",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721705",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multifrequency media access control has been well
                 understood in general wireless ad hoc networks, while
                 in wireless sensor networks, researchers still focus on
                 single frequency solutions. In wireless sensor
                 networks, each device is typically equipped with a
                 single radio transceiver and applications adopt much
                 smaller packet sizes compared to those in general
                 wireless ad hoc networks. Hence, the multifrequency MAC
                 protocols proposed for general wireless ad hoc networks
                 are not suitable for wireless sensor network
                 applications, which we further demonstrate through our
                 simulation experiments. In this article, we propose
                 MMSN, which takes advantage of multifrequency
                 availability while, at the same time, takes into
                 consideration the restrictions of wireless sensor
                 networks. In MMSN, four frequency assignment options
                 are provided to meet different application
                 requirements. A scalable media access is designed with
                 efficient broadcast support. Also, an optimal
                 nonuniform back-off algorithm is derived and its
                 lightweight approximation is implemented in MMSN, which
                 significantly reduces congestion in the time
                 synchronized media access design. Through extensive
                 experiments, MMSN exhibits the prominent ability to
                 utilize parallel transmissions among neighboring nodes.
                 When multiple physical frequencies are available, it
                 also achieves increased energy efficiency,
                 demonstrating the ability to work against radio
                 interference and the tolerance to a wide range of
                 measured time synchronization errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "media access control; multi-channel; radio
                 interference; time synchronization; Wireless sensor
                 networks",
}

@Article{Jung:2010:SFS,
  author =       "Dawoon Jung and Jeong-Uk Kang and Heeseung Jo and
                 Jin-Soo Kim and Joonwon Lee",
  title =        "Superblock {FTL}: a superblock-based {Flash
                 Translation Layer} with a hybrid address translation
                 scheme",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "40:1--40:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721706",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In NAND flash-based storage systems, an intermediate
                 software layer called a Flash Translation Layer (FTL)
                 is usually employed to hide the erase-before-write
                 characteristics of NAND flash memory. We propose a
                 novel superblock-based FTL scheme, which combines a set
                 of adjacent logical blocks into a superblock. In the
                 proposed Superblock FTL, superblocks are mapped at
                 coarse granularity, while pages inside the superblock
                 are mapped freely at fine granularity to any location
                 in several physical blocks. To reduce extra storage and
                 flash memory operations, the fine-grain mapping
                 information is stored in the spare area of NAND flash
                 memory. This hybrid address translation scheme has the
                 flexibility provided by fine-grain address translation,
                 while reducing the memory overhead to the level of
                 coarse-grain address translation. Our experimental
                 results show that the proposed FTL scheme significantly
                 outperforms previous block-mapped FTL schemes with
                 roughly the same memory overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "FTL; hybrid address translation; NAND flash memory;
                 storage system",
}

@Article{Klues:2010:LLD,
  author =       "Kevin Klues and Guoliang Xing and Chenyang Lu",
  title =        "Link layer driver architecture for unified radio power
                 management in wireless sensor networks",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "41:1--41:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721707",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless Sensor Networks (WSNs) represent a new
                 generation of networked embedded systems that must
                 achieve long lifetimes on scarce amounts of energy.
                 Since radio communication accounts for the primary
                 source of power drain in these networks, a large number
                 of different radio power management protocols have been
                 proposed. However, the lack of operating system support
                 for flexibly integrating them with a diverse set of
                 applications and network platforms has made them
                 difficult to use. This article focuses on providing
                 link layer support toward realizing a unified power
                 management architecture (UPMA) for WSNs. In contrast to
                 existing monolithic approaches, we provide (i) a set of
                 standard interfaces that separate link layer power
                 management protocols from common MAC level
                 functionality, (ii) an architectural framework that
                 allows applications to easily swap out different
                 power-management protocols depending on its needs, and
                 (iii) a mechanism for coordinating multiple
                 applications with different power management
                 requirements. We have implemented our approach on both
                 the Mica2 and Telosb radio drivers in TinyOS-2.0, the
                 second generation of the de facto standard operating
                 system for WSNs. Microbenchmark results show that our
                 approach can coordinate the power-management
                 requirements of multiple applications in a platform
                 independent fashion while incurring negligible
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "architecture; framework; radio power management;
                 Wireless sensor networks",
}

@Article{Lee:2010:IHM,
  author =       "Jupyung Lee and Kyu Ho Park",
  title =        "Interrupt handler migration and direct interrupt
                 scheduling for rapid scheduling of interrupt-driven
                 tasks",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "42:1--42:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721708",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose two techniques that aim to
                 minimize the scheduling latency of high-priority
                 interrupt-driven tasks, named the Interrupt Handler
                 Migration (IHM) and Direct Interrupt Scheduling (DIS).
                 The IHM allows the interrupt handler to be migrated
                 from the interrupt handler thread to the corresponding
                 target process so that additional context switch can be
                 avoided and the cache hit ratio with respect to the
                 data generated by the interrupt handler can be
                 improved. In addition, the DIS allows the shortest path
                 reserved for urgent interrupt-process pairs to be laid
                 between the interrupt arrival and target process by
                 dividing a series of interrupt-driven operations into
                 nondeferrable and deferrable operations. Both the IHM
                 and DIS can be combined in a natural way and can
                 operate concurrently. These techniques can be applied
                 to all kinds of interrupt handlers with no modification
                 to them. The proposed techniques not only reduce the
                 scheduling latency, but also resolve the
                 interrupt-driven priority inversion problem.\par

                 We implemented a prototype in the Linux 2.6.19 kernel
                 after adding real-time patches. Experimental results
                 show that the scheduling latency is significantly
                 reduced by up to 84.2\% when both techniques are
                 applied together. When the Linux OS runs on an
                 ARM-based embedded CPU running at 200MHz, the
                 scheduling latency can become as low as 30$ \mu $ s,
                 which is much closer to the hardware-specific
                 limitations. By lowering the scheduling latency, the
                 limited CPU cycles can be consumed more for user-level
                 processes and less for system-level tasks, such as
                 interrupt handling and scheduling.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "interrupt handling; latency; Linux; Real-time
                 operating system; responsiveness; scheduling",
}

@Article{Tan:2010:MSE,
  author =       "Chiu C. Tan and Bo Sheng and Haodong Wang and Qun Li",
  title =        "{Microsearch}: a search engine for embedded devices
                 used in pervasive computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "43:1--43:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1721695.1721709",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present Microsearch, a search
                 system suitable for embedded devices used in ubiquitous
                 computing environments. Akin to a desktop search
                 engine, Microsearch indexes the information inside a
                 small device, and accurately resolves a user's queries.
                 Given the limited hardware, conventional search engine
                 design and algorithms cannot be used. We adopt
                 Information Retrieval (IR) techniques for query
                 resolution, and proposed a new space-efficient top-$k$
                 query resolution algorithm. A theoretical model of
                 Microsearch is given to better understand the
                 trade-offs in design parameters. Evaluation is done via
                 actual implementation on off-the-shelf hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Embedded search engine; information retrieval;
                 pervasive computing",
}

@Article{Higuera-Toledano:2010:ISI,
  author =       "M. Teresa Higuera-Toledano and Doug Locke and Angelo
                 Corsaro",
  title =        "Introduction to special issue on {Java} technologies
                 for real-time and embedded systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814540",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DosSantos:2010:MPB,
  author =       "Osmar Marchi {Dos Santos} and Andy Wellings",
  title =        "Measuring and policing blocking times in real-time
                 systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In real-time systems, the execution-time overrun of a
                 thread may lead to a deadline being missed by the
                 thread or even others threads in the system. From a
                 fault tolerance perspective, both execution time
                 overruns and deadline misses can be considered timing
                 errors that could potentially cause a failure in the
                 system's ability to deliver its services in a timely
                 manner. In this context, the ideal is to detect the
                 error in the system as soon as possible, so that the
                 propagation of the error can be limited and error
                 recovery strategies can take place with more accurate
                 information. The run-time support mechanism usually
                 deployed for monitoring the timing requirements of
                 real-time systems is based on deadline monitoring, that
                 is, the system calls specific application code whenever
                 a deadline is violated. Recognizing that deadline
                 monitoring may not be enough for providing an adequate
                 level of fault tolerance for timing errors, major
                 real-time programming standards, like Ada, POSIX and
                 the Real-Time Specification for Java (RTSJ), have
                 proposed different mechanisms for monitoring the
                 execution time of threads. Nevertheless, in order to
                 provide a complete fault tolerance approach for timing
                 errors, the potential blocking time of threads also has
                 to be monitored. In this article, we propose mechanisms
                 for measuring and policing the blocking time of threads
                 in the context of both {\em basic priority
                 inheritance\/} and {\em priority ceiling protocols}.
                 The notion of {\em blocking-time clocks and timers\/}
                 for the POSIX standard is proposed, implemented and
                 evaluated in the open-source real-time operating system
                 MaRTE OS. Also, a {\em blocking time monitoring
                 model\/} for measuring and policing blocking times in
                 the RTSJ framework is specified. This model is
                 implemented and evaluated in the (RTSJ-compliant)
                 open-source middleware jRate, running on top of MaRTE
                 OS.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "error detection; POSIX standard; Real-time
                 specification for Java; timing errors",
}

@Article{Zerzelidis:2010:FFS,
  author =       "Alexandros Zerzelidis and Andy Wellings",
  title =        "A framework for flexible scheduling in the {RTSJ}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814542",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a viable solution to introducing
                 flexible scheduling in the Real-Time specification for
                 Java (RTSJ), in the form of a flexible scheduling
                 framework. The framework allows the concurrent use of
                 multiple application-defined scheduling policies, each
                 scheduling a subset of the total set of threads.
                 Moreover, all threads, regardless of the policy under
                 which they are scheduled, are permitted to share common
                 resources. Thus, the framework can accommodate a
                 variety of interworking applications (soft, firm, and
                 hard) running under the RTSJ. The proposed approach is
                 a two-level scheduling framework, where the first level
                 is the RTSJ priority scheduler and the second level is
                 under application control. This article describes the
                 framework's protocol, examines the different types of
                 scheduling policies that can be supported, and
                 evaluates the proposed framework by measuring its
                 execution cost. A description of an application-defined
                 Earliest-Deadline-First (EDF) scheduler illustrates how
                 the interface can be used. Minimum backward-compatible
                 changes to the RTSJ specification are discussed to
                 motivate the required interface. The only assumptions
                 made about the underlying real-time operating system is
                 that it supports preemptive priority-based dispatching
                 of threads and that changes to priorities have
                 immediate effect.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "flexible scheduling; RTSJ; Scheduling framework",
}

@Article{Spring:2010:RAI,
  author =       "Jesper Honig Spring and Filip Pizlo and Jean Privat
                 and Rachid Guerraoui and Jan Vitek",
  title =        "{Reflexes}: {Abstractions} for integrating highly
                 responsive tasks into {Java} applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/csharp.bib;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Achieving submillisecond response times in a managed
                 language environment such as Java or C\# requires
                 overcoming significant challenges. In this article, we
                 propose Reflexes, a programming model and runtime
                 system infrastructure that lets developers seamlessly
                 mix highly responsive tasks and timing-oblivious Java
                 applications. Thus enabling gradual addition of
                 real-time features, to a non-real-time application
                 without having to resort to recoding the real-time
                 parts in a different language such as C or Ada.
                 Experiments with the Reflex prototype implementation
                 show that it is possible to run a real-time task with a
                 period of 45$ \mu $ s with an accuracy of 99.996\%
                 (only 0.001\% worse than the corresponding C
                 implementation) in the presence of garbage collection
                 and heavy load ordinary Java threads.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Java virtual machine; memory management; Real-time
                 systems",
}

@Article{Kim:2010:EAE,
  author =       "Minseong Kim and Andy Wellings",
  title =        "Efficient asynchronous event handling in the real-time
                 specification for {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814544",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Real-Time Specification for Java (RTSJ) is
                 becoming mature. It has been implemented, formed the
                 basis for research and used in serious applications.
                 Some strengths and weaknesses are emerging. One of the
                 areas that requires further elaboration is asynchronous
                 event handling (AEH). The primary goal for handlers in
                 the RTSJ is to have a lightweight concurrency
                 mechanism. Some implementation will, however, simply
                 map a handler to a real-time thread and this results in
                 undermining the original motivations and introduces
                 performance penalties. However it is generally unclear
                 how to map handlers to real-time threads effectively.
                 Also the support for nonblocking handlers in the RTSJ
                 is criticized as lacking in configurability as
                 implementations are unable to take advantage of them.
                 This article, therefore, examines the AEH techniques
                 used in some popular RTSJ implementations and proposes
                 two efficient AEH models for the RTSJ. We then define
                 formal models of the RTSJ AEH implementations using the
                 automata formalism provided by the UPPAAL model
                 checking tool. Using the automata models, their
                 properties are explored and verified. In the proposed
                 models, blocking and nonblocking handlers are serviced
                 by different algorithms. In this way, it is possible to
                 assign a real-time thread to a handler at the right
                 time in the right place while maintaining the fewest
                 possible threads overall and to give a certain level of
                 configurability to AEH. We also have implemented the
                 proposed models on an existing RTSJ implementation,
                 jRate and executed a set of performance tests that
                 measure their respective dispatch and multiple-handler
                 completion latencies. The results from the tests and
                 the verifications indicate that the proposed models
                 require fewer threads on average with better
                 performance than other approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "asynchronous event handling; blocking handler;
                 multiple-server switching phenomenon; nonblocking
                 handler; RTSJ",
}

@Article{Schoeberl:2010:NRT,
  author =       "Martin Schoeberl and Wolfgang Puffitsch",
  title =        "Nonblocking real-time garbage collection",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814545",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A real-time garbage collector has to fulfill two basic
                 properties: ensure that programs with bounded
                 allocation rates do not run out of memory and provide
                 short blocking times. Even for incremental garbage
                 collectors, two major sources of blocking exist,
                 namely, root scanning and heap compaction. Finding root
                 nodes of an object graph is an integral part of tracing
                 garbage collectors and cannot be circumvented. Heap
                 compaction is necessary to avoid probably unbounded
                 heap fragmentation, which in turn would lead to
                 unacceptably high memory consumption. In this article,
                 we propose solutions to both issues.\par

                 Thread stacks are local to a thread, and root scanning,
                 therefore, only needs to be atomic with respect to the
                 thread whose stack is scanned. This fact can be
                 utilized by either blocking only the thread whose stack
                 is scanned, or by delegating the responsibility for
                 root scanning to the application threads. The latter
                 solution eliminates blocking due to root scanning
                 completely. The impact of this solution on the
                 execution time of a garbage collector is shown for two
                 different variants of such a root scanning
                 algorithm.\par

                 During heap compaction, objects are copied. Copying is
                 usually performed atomically to avoid interference with
                 application threads, which could render the state of an
                 object inconsistent. Copying of large objects and
                 especially large arrays introduces long blocking times
                 that are unacceptable for real-time systems. In this
                 article, an interruptible copy unit is presented that
                 implements nonblocking object copy. The unit can be
                 interrupted after a single word move.\par

                 We evaluate a real-time garbage collector that uses the
                 proposed techniques on a Java processor. With this
                 garbage collector, it is possible to run high-priority
                 hard real-time tasks at 10 kHz parallel to the garbage
                 collection task on a 100 MHz system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Garbage collection; nonblocking copying; real-time;
                 root scanning",
}

@Article{Basanta-Val:2010:NHR,
  author =       "Pablo Basanta-Val and Marisol Garc{\'\i}a-Valls and
                 Iria Est{\'e}vez-Ayres",
  title =        "{No-Heap Remote Objects} for distributed real-time
                 {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814546",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents an approach to providing
                 real-time support for Java's Remote Method Invocation
                 (RMI) and its integration with the RTSJ memory model in
                 order to leave out garbage collection. A new construct
                 for remote objects, called {\em No-heap Remote
                 object\/} ({\em NhRo\/}), is introduced. The use of a
                 NhRo guarantees that memory required to perform a
                 remote invocation (at the server side) does not use
                 heap memory. Thus, the aim is to avoid garbage
                 collection in the remote invocation process, improving
                 predictability and memory isolation of distributed
                 Java-based real-time applications. The article presents
                 the bare model and the main programming patterns that
                 are associated with the NhRo model. Sun RMI
                 implementation has been modified to integrate the NhRo
                 model in both static and dynamic environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "distributed real-time Java; DRTSJ; Real-time Java;
                 real-time remote objects; region-based memory
                 management; RTSJ",
}

@Article{Curley:2010:RDT,
  author =       "Edward Curley and Binoy Ravindran and Jonathan
                 Anderson and E. Douglas Jensen",
  title =        "Recovering from distributable thread failures in
                 distributed real-time {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814547",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider the problem of recovering from the
                 failures of distributable threads (``threads'') in
                 distributed real-time systems that operate under
                 runtime uncertainties including those on thread
                 execution times, thread arrivals, and node failure
                 occurrences. When a thread experiences a node failure,
                 the result is a broken thread having an orphan. Under a
                 termination model, the orphans must be detected and
                 aborted, and exceptions must be delivered to the
                 farthest, contiguous surviving thread segment for
                 resuming thread execution. Our application/scheduling
                 model includes the proposed distributable thread
                 programming model for the emerging Distributed
                 Real-Time Specification for Java (DRTSJ), together with
                 an exception-handler model. Threads are subject to
                 time/utility function (TUF) time constraints and an
                 utility accrual (UA) optimality criterion. A key
                 underpinning of the TUF/UA scheduling paradigm is the
                 notion of ``best-effort'' where higher importance
                 threads are always favored over lower importance ones,
                 irrespective of thread urgency as specified by their
                 time constraints. We present a thread scheduling
                 algorithm called HUA and a thread integrity protocol
                 called TPR. We show that HUA and TPR bound the orphan
                 cleanup and recovery time with bounded loss of the
                 best-effort property. Our implementation experience for
                 HUA/TPR in the Reference Implementation of the proposed
                 programming model for the DRTSJ demonstrates the
                 algorithm/protocol's effectiveness.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "distributable thread; Distributed; distributed
                 scheduling; Java; real-time; thread integrity",
}

@Article{Pitter:2010:RTJ,
  author =       "Christof Pitter and Martin Schoeberl",
  title =        "A real-time {Java} chip-multiprocessor",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814548",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Chip-multiprocessors are an emerging trend for
                 embedded systems. In this article, we introduce a
                 real-time Java multiprocessor called JopCMP. It is a
                 symmetric shared-memory multiprocessor, and consists of
                 up to eight Java Optimized Processor (JOP) cores, an
                 arbitration control device, and a shared memory. All
                 components are interconnected via a system on chip bus.
                 The arbiter synchronizes the access of multiple CPUs to
                 the shared main memory. In this article, three
                 different arbitration policies are presented,
                 evaluated, and compared with respect to their real-time
                 and average-case performance: a fixed priority, a
                 fair-based, and a time-sliced arbiter.\par

                 Tasks running on different CPUs of a
                 chip-multiprocessor (CMP) influence each others'
                 execution times when accessing a shared memory.
                 Therefore, the system needs an arbiter that is able to
                 limit the worst-case execution time of a task running
                 on a CPU, even though tasks executing simultaneously on
                 other CPUs access the main memory. Our research shows
                 that timing analysis is in fact possible for
                 homogeneous multiprocessor systems with a shared
                 memory. The timing analysis of tasks, executing on the
                 CMP using time-sliced memory arbitration, leads to
                 viable worst-case execution time bounds.\par

                 The time-sliced arbiter divides the memory access time
                 into equal time slots, one time slot for each CPU. This
                 memory arbitration scheme allows for a calculation of
                 upper bounds of Java application worst-case execution
                 times, depending on the number of CPUs, the time slot
                 size, and the memory access time. Examples of
                 worst-case execution time calculation are presented,
                 and the analyzed results of a real-world application
                 task are compared to measured execution time results.
                 Finally, we evaluate the tradeoffs when using a
                 time-predictable solution compared to using
                 average-case optimized chip-multiprocessors, applying
                 three different benchmarks. These experiments are
                 carried out by executing the programs on the CMP
                 prototype.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Java processor; multiprocessor; Real-time system;
                 shared memory; worst-case execution time",
}

@Article{Kaiser:2010:ISI,
  author =       "William Kaiser and Majid Sarrafzadeh",
  title =        "Introduction to special issue on wireless health",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814549",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ko:2010:MME,
  author =       "Jeonggil Ko and Jong Hyun Lim and Yin Chen and
                 Rv{\~a}zvan Musvaloiu-E and Andreas Terzis and Gerald
                 M. Masson and Tia Gao and Walt Destler and Leo Selavo
                 and Richard P. Dutton",
  title =        "{MEDiSN}: {Medical} emergency detection in sensor
                 networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814550",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Staff shortages and an increasingly aging population
                 are straining the ability of emergency departments to
                 provide high quality care. At the same time, there is a
                 growing concern about hospitals' ability to provide
                 effective care during disaster events. For these
                 reasons, tools that automate patient monitoring have
                 the potential to greatly improve efficiency and quality
                 of health care. Towards this goal, we have developed
                 {\em MEDiSN}, a wireless sensor network for monitoring
                 patients' physiological data in hospitals and during
                 disaster events. MEDiSN comprises {\em Physiological
                 Monitors\/} (PMs), which are custom-built, patient-worn
                 motes that sample, encrypt, and sign physiological data
                 and {\em Relay Points\/} (RPs) that self-organize into
                 a multi-hop wireless backbone for carrying
                 physiological data. Moreover, MEDiSN includes a
                 back-end server that persistently stores medical data
                 and presents them to authenticated GUI clients. The
                 combination of MEDiSN's two-tier architecture and
                 optimized rate control protocols allows it to address
                 the compound challenge of reliably delivering large
                 volumes of data while meeting the application's QoS
                 requirements. Results from extensive simulations,
                 testbed experiments, and multiple pilot hospital
                 deployments show that MEDiSN can scale from tens to at
                 least five hundred PMs, effectively protect application
                 packets from congestive and corruptive losses, and
                 deliver medically actionable data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Medical sensor networks; wireless physiological
                 monitoring",
}

@Article{Coronato:2010:FSW,
  author =       "Antonio Coronato and Giuseppe {De Pietro}",
  title =        "Formal specification of wireless and pervasive
                 healthcare applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814551",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless and pervasive healthcare applications
                 typically present critical requirements from the point
                 of view of functional correctness, reliability,
                 availability, security, and safety. In contrast to the
                 case of classic safety critical applications, the
                 behavior of wireless and pervasive applications is
                 affected by the movements and location of users and
                 resources.\par

                 This article presents a methodology to formally express
                 requirements in safety critical wireless and pervasive
                 healthcare applications in order to achieve a higher
                 degree of dependability. In particular, it will be
                 shown how it is possible to formalize and constrict
                 mobility characteristics by combining, and in some
                 cases extending, several formal methods. The article
                 also describes a rigorous specification process.
                 Finally, it concludes with a case study of a real
                 safety critical pervasive healthcare application that
                 is going to be deployed in a city hospital.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "Formal specification; methodologies and tools;
                 wireless and pervasive healthcare applications",
}

@Article{Waluyo:2010:MMB,
  author =       "Agustinus Borgy Waluyo and Wee-Soon Yeoh and Isaac Pek
                 and Yihan Yong and Xiang Chen",
  title =        "{MobiSense}: {Mobile} body sensor network for
                 ambulatory monitoring",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814552",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces MobiSense, a novel mobile
                 health monitoring system for ambulatory patients.
                 MobiSense resides in a mobile device, communicates with
                 a set of body sensor devices attached to the wearer,
                 and processes data from these sensors. MobiSense is
                 able to detect body postures such as lying, sitting,
                 and standing, and walking speed, by utilizing our
                 rule-based heuristic activity classification scheme
                 based on the extended Kalman (EK) Filtering algorithm.
                 Furthermore, the proposed system is capable of
                 controlling each of the sensor devices, and performing
                 resource reconfiguration and management schemes (sensor
                 sleep/wake-up mode). The architecture of MobiSense is
                 highlighted and discussed in depth. The system has been
                 implemented, and its prototype is showcased. We have
                 also carried out rigorous performance measurements of
                 the system including real-time and query latency as
                 well as the power consumption of the sensor nodes. The
                 accuracy of our activity classifier scheme has been
                 evaluated by involving several human subjects, and we
                 found promising results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "ambulatory patient monitoring; pervasive healthcare;
                 wireless body sensor network; Wireless health system",
}

@Article{Quwaider:2010:TPA,
  author =       "Muhannad Quwaider and Jayanthi Rao and Subir Biswas",
  title =        "Transmission power assignment with postural position
                 inference for on-body wireless communication links",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1814539.1814553",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a novel transmission power
                 assignment mechanism for on-body wireless links formed
                 between severely energy-constrained wearable and
                 implanted sensors. The key idea is to develop a
                 measurement-based framework in which the postural
                 position as it pertains to a given wireless link is
                 first inferred based on the measured RF signal strength
                 and packet drops. Then optimal power assignment is done
                 by fitting those measurement results into a model
                 describing the relationship between the assigned power
                 and the resulting signal strength. A closed loop power
                 control mechanism is then added for iterative
                 convergence to the optimal power level as a response to
                 both intra-and-inter posture body movements. This
                 provides a practical paradigm for on-body power
                 assignment, which cannot leverage the existing
                 mechanisms in the literature that rely on localization,
                 which is not realistic for on-body sensors. Extensive
                 experimental results are provided to demonstrate the
                 model building and algorithm performance on a prototype
                 body area network. The proposed mechanism has also been
                 compared with a number of other closed loop mechanisms
                 and an experimental benchmark.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "adaptive power control; Body area network; link
                 quality measurement; radio link quality",
}

@Article{Basten:2010:EMD,
  author =       "Twan Basten and Rolf Ernst",
  title =        "Editorial: {Model-driven} embedded-system design",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880051",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Geilen:2010:SDS,
  author =       "Marc Geilen",
  title =        "Synchronous dataflow scenarios",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880052",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Synchronous Dataflow (SDF) model of computation by
                 Lee and Messerschmitt has become popular for modeling
                 concurrent applications on a multiprocessor platform.
                 It is used to obtain a guaranteed, predictable
                 performance. The model, on the other hand, is quite
                 restrictive in its expressivity, making it less
                 applicable to many modern, more dynamic applications. A
                 common technique to deal with dynamic behavior is to
                 consider different scenarios in separation. This
                 analysis is, however, currently limited mainly to
                 sequential applications. In this article, we present a
                 new analysis approach that allows analysis of
                 synchronous dataflow models across different scenarios
                 of operation. The dataflow graphs corresponding to the
                 different scenarios can be completely different.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wiggers:2010:BCC,
  author =       "Maarten H. Wiggers and Marco J. G. Bekooij and Gerard
                 J. M. Smit",
  title =        "Buffer capacity computation for throughput-constrained
                 modal task graphs",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880053",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Increasingly, stream-processing applications include
                 complex control structures to better adapt to changing
                 conditions in their environment. This adaptivity often
                 results in task execution rates that are dependent on
                 the processed stream. Current approaches to compute
                 buffer capacities that are sufficient to satisfy a
                 throughput constraint have limited applicability in
                 case of data-dependent task execution rates. In this
                 article, we present a dataflow model that allows tasks
                 to have loops with an unbounded number of iterations.
                 For instances of this dataflow model, we present
                 efficient checks on their validity. Furthermore, we
                 present an efficient algorithm to compute buffer
                 capacities that are sufficient to satisfy a throughput
                 constraint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Falk:2010:ASA,
  author =       "Joachim Falk and Christian Zebelein and Joachim
                 Keinert and Christian Haubelt and Juergen Teich and
                 Shuvra S. Bhattacharyya",
  title =        "Analysis of {SystemC} actor networks for efficient
                 synthesis",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880054",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Applications in the signal processing domain are often
                 modeled by dataflow graphs. Due to heterogeneous
                 complexity requirements, these graphs contain both
                 dynamic and static dataflow actors. In previous work,
                 we presented a generalized clustering approach for
                 these heterogeneous dataflow graphs in the presence of
                 unbounded buffers. This clustering approach allows the
                 application of static scheduling methodologies for
                 static parts of an application during embedded software
                 generation for multiprocessor systems. It
                 systematically exploits the predictability and
                 efficiency of the static dataflow model to obtain
                 latency and throughput improvements. In this article,
                 we present a generalization of this clustering
                 technique to dataflow graphs with bounded buffers,
                 therefore enabling synthesis for embedded systems
                 without dynamic memory allocation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Razavi:2010:SAB,
  author =       "Niloofar Razavi and Razieh Behjati and Hamideh Sabouri
                 and Ehsan Khamespanah and Amin Shali and Marjan
                 Sirjani",
  title =        "{Sysfier}: {Actor-based} formal verification of
                 {SystemC}",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880055",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "SystemC is a system-level modeling language that can
                 be used effectively for hardware/software co-design.
                 Since a major goal of SystemC is to enable verification
                 at higher levels of abstraction, the tendency is now
                 directing to introducing formal verification approaches
                 for SystemC. In this article, we propose an approach
                 for formal verification of SystemC designs, and provide
                 the semantics of SystemC using Labeled Transition
                 Systems (LTS) for this purpose. An actor-based
                 language, Rebeca, is used as an intermediate language.
                 SystemC designs are mapped to Rebeca models and then
                 Rebeca verification toolset is used to verify LTL and
                 CTL properties. To tackle the state-space explosion,
                 Rebeca model checkers offer some reduction policies
                 that make them appropriate for SystemC verification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Adler:2010:CBM,
  author =       "Rasmus Adler and Ina Schaefer and Mario Trapp and Arnd
                 Poetzsch-Heffter",
  title =        "Component-based modeling and verification of dynamic
                 adaptation in safety-critical embedded systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "20:1--20:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880056",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Adaptation is increasingly used in the development of
                 safety-critical embedded systems, in particular to
                 reduce hardware needs and to increase availability.
                 However, composing a system from many reconfigurable
                 components can lead to a huge number of possible system
                 configurations, inducing a complexity that cannot be
                 handled during system design. To overcome this problem,
                 we propose a new component-based modeling and
                 verification method for adaptive embedded systems. The
                 component-based modeling approach facilitates
                 abstracting a composition of components to a
                 hierarchical component. In the hierarchical component,
                 the number of possible configurations of the
                 composition is reduced to a small number of
                 hierarchical configurations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Driver:2010:MES,
  author =       "Cormac Driver and Sean Reilly and {\'E}amonn Linehan
                 and Vinny Cahill and Siobh{\'a}n Clarke",
  title =        "Managing embedded systems complexity with
                 aspect-oriented model-driven engineering",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "21:1--21:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880057",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Model-driven engineering addresses issues of platform
                 heterogeneity and code quality through the use of
                 high-level system models and subsequent automatic
                 transformations. Adoption of the model-driven software
                 engineering paradigm for embedded systems necessitates
                 specification of appropriate models of often complex
                 systems. Modern embedded systems are typically composed
                 of multiple functional and nonfunctional concerns, with
                 the nonfunctional concerns (e.g., timing and
                 performance) typically affecting the design and
                 implementation of the functional concerns. The presence
                 of crosscutting concerns makes specification of
                 adequate platform-independent models a significant
                 challenge. Aspect-oriented software development is a
                 separation of concerns technique that decomposes
                 systems into distinct features with minimal overlap.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schliecker:2010:RTP,
  author =       "Simon Schliecker and Rolf Ernst",
  title =        "Real-time performance analysis of multiprocessor
                 systems with shared memory",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "22:1--22:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880058",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Predicting timing behavior is key to reliable
                 real-time system design and verification, but becomes
                 increasingly difficult for current multiprocessor
                 systems on chip. The integration of formerly separate
                 functionality into a single multicore system introduces
                 new intercore timing dependencies resulting from the
                 common use of the now shared resources. This feedback
                 of system timing on local timing makes traditional
                 performance analysis approaches inappropriate. This
                 article presents a general methodology to model the
                 shared resource traffic and consider its effect on the
                 local task execution. The aggregate busy time captures
                 the timing of multiple accesses to a shared memory far
                 better than the traditional models that focus on the
                 timing of individual events.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2010:DAS,
  author =       "Euiseong Seo and Sangwon Kim and Seonyeong Park and
                 Joonwon Lee",
  title =        "Dynamic alteration schemes of real-time schedules for
                 {I/O} device energy efficiency",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "23:1--23:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880059",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many I/O devices provide multiple power states known
                 as the dynamic power management (DPM) feature. However,
                 activating from sleep state requires significant
                 transition time and this obstructs utilizing DPM in
                 nonpreemptive real-time systems. This article suggests
                 nonpreemptive real-time task scheduling schemes
                 maximizing the effectiveness of the I/O device DPM
                 support. First, we introduce a runtime schedulability
                 check algorithm for nonpreemptive real-time systems
                 that can check whether a modification from a valid
                 schedule is still valid. By using this, we suggest
                 three heuristic algorithms. The first algorithm
                 reorders the execution sequence of tasks according to
                 the similarity of their required device sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cabodi:2010:BSF,
  author =       "Gianpiero Cabodi and Marco Murciano and Massimo
                 Violante",
  title =        "Boosting software fault injection for dependability
                 analysis of real-time embedded applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "24:1--24:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880060",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The design of complex embedded systems deployed in
                 safety-critical or mission-critical applications
                 mandates the availability of methods to validate the
                 system dependability across the whole design flow. In
                 this article we introduce a fault injection approach,
                 based on loadable kernel modules and running under the
                 Linux operating system, which can be adopted as soon as
                 a running prototype of the systems is available.
                 Moreover, for the purpose of decoupling dependability
                 analysis from hardware availability, we also propose
                 the adoption of hardware virtualization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mohan:2010:PTA,
  author =       "Sibin Mohan and Frank Mueller and Michael Root and
                 William Hawkins and Christopher Healy and David Whalley
                 and Emilio Vivancos",
  title =        "Parametric timing analysis and its application to
                 dynamic voltage scaling",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "25:1--25:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880061",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems with real-time constraints depend on
                 a priori knowledge of worst-case execution times
                 (WCETs) to determine if tasks meet deadlines. Static
                 timing analysis derives bounds on WCETs but requires
                 statically known loop bounds. This work removes the
                 constraint on known loop bounds through parametric
                 analysis expressing WCETs as functions. Tighter WCETs
                 are dynamically discovered to exploit slack by dynamic
                 voltage scaling (DVS) saving 60\% to 82\% energy over
                 DVS-oblivious techniques and showing savings close to
                 more costly dynamic-priority DVS algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2010:RAD,
  author =       "Dakai Zhu",
  title =        "Reliability-aware dynamic energy management in
                 dependable embedded real-time systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "26:1--26:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880062",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent studies show that voltage scaling, which is an
                 efficient energy management technique, has a direct and
                 negative effect on system reliability because of the
                 increased rate of transient faults (e.g., those induced
                 by cosmic particles). In this article, we propose
                 energy management schemes that explicitly take system
                 reliability into consideration. The proposed
                 reliability-aware energy management schemes dynamically
                 schedule recoveries for tasks to be scaled down to
                 recuperate the reliability loss due to energy
                 management. Based on the amount of available slack, the
                 application size, and the fault rate changes, we
                 analyze when it is profitable to reclaim the slack for
                 energy savings without sacrificing system
                 reliability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ramaprasad:2010:TBF,
  author =       "Harini Ramaprasad and Frank Mueller",
  title =        "Tightening the bounds on feasible preemptions",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "27:1--27:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880063",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Data caches are an increasingly important
                 architectural feature in most modern computer systems.
                 They help bridge the gap between processor speeds and
                 memory access times. One inherent difficulty of using
                 data caches in a real-time system is the
                 unpredictability of memory accesses, which makes it
                 difficult to calculate worst-case execution times
                 (WCETs) of real-time tasks. While cache analysis for
                 single real-time tasks has been the focus of much
                 research in the past, bounding the preemption delay in
                 a multitask preemptive environment is a challenging
                 problem, particularly for data caches. This article
                 makes multiple contributions in the context of
                 independent, periodic tasks with deadlines less than or
                 equal to their periods executing on a single
                 processor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2010:SMA,
  author =       "Lian Li and Jingling Xue and Jens Knoop",
  title =        "Scratchpad memory allocation for data aggregates via
                 interval coloring in superperfect graphs",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "28:1--28:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880064",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Existing methods place data or code in scratchpad
                 memory (SPM) by relying on heuristics or resorting to
                 integer programming or mapping it to a graph-coloring
                 problem. In this article, the SPM allocation problem
                 for arrays is formulated as an interval coloring
                 problem. The key observation is that in many embedded C
                 programs, two arrays can be modeled such that either
                 their live ranges do not interfere or one contains the
                 other (with good accuracy). As a result, array
                 interference graphs often form a special class of
                 superperfect graphs (known as comparability graphs),
                 and their optimal interval colorings become efficiently
                 solvable. This insight has led to the development of an
                 SPM allocation algorithm that places arrays in an
                 interference graph in SPM by examining its maximal
                 cliques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Singh:2010:CPD,
  author =       "Montek Singh and Steven M. Nowick",
  title =        "Call for papers: {Deadline: March 15, 2011}",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "29:1--29:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1880050.1880065",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{West:2011:ASS,
  author =       "Richard West and Gabriel Parmer",
  title =        "Application-specific service technologies for
                 commodity operating systems in real-time environments",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952523",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to eliminate the costs of proprietary systems
                 and special purpose hardware, many real-time and
                 embedded computing platforms are being built on
                 commodity operating systems and generic hardware.
                 Unfortunately, many such systems are ill-suited to the
                 low-latency and predictable timing requirements of
                 real-time applications. This article, therefore,
                 focuses on application-specific service technologies
                 for low-cost commodity operating systems and hardware,
                 so that real-time service guarantees can be met. We
                 describe contrasting methods to deploy first-class
                 services on commodity systems that are dispatched with
                 low latency and execute asynchronously according to
                 bounds on CPU, memory, and I/O device usage.
                 Specifically, we present a ``user-level sandboxing''
                 (ULS) mechanism that relies on hardware protection to
                 isolate application-specific services from the core
                 kernel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2011:NBF,
  author =       "Xue Liu and Tarek Abdelzaher",
  title =        "Nonutilization bounds and feasible regions for
                 arbitrary fixed-priority policies",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952524",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Prior research on schedulability bounds focused
                 primarily on bounding utilization/ as a means to meet
                 deadline constraints. Nontrivial bounds were found for
                 a handful of scheduling policies in which utilization
                 is directly related to the ability of the policy to
                 meet deadlines. Examples include rate-monotonic,
                 deadline-monotonic, and EDF scheduling. For most other
                 scheduling policies, however, utilization is not
                 correlated with schedulability. For example,
                 shortest-job-first can miss deadlines at an arbitrarily
                 low utilization. This raises the question of whether or
                 not some other nonutilization-based metric might be
                 more indicative of schedulability in those cases. This
                 article answers the above question positively by
                 extending the notion of schedulability bounds, in a
                 uniform manner, to arbitrary (fixed) priorities and
                 nonutilization metrics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nair:2011:EHB,
  author =       "Ajay Nair and Karthik Shankar and Roman Lysecky",
  title =        "Efficient hardware-based nonintrusive dynamic
                 application profiling",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952525",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application profiling---the process of monitoring an
                 application to determine the frequency of execution
                 within specific regions---is an essential step within
                 the design process for many software and hardware
                 systems. Profiling is often a critical step within
                 hardware/software partitioning utilized to determine
                 the critical kernels of an application. In this
                 article, we present an innovative, nonintrusive dynamic
                 application profiler (DAProf) capable of profiling an
                 executing application by monitoring the application's
                 short backward branches, function calls, and function
                 returns. The resulting profile information provides an
                 accurate characterization of the frequently executed
                 loops within the application providing a breakdown of
                 loop executions versus loop iterations per execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aaraj:2011:FDE,
  author =       "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
  title =        "A framework for defending embedded systems against
                 software attacks",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952526",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The incidence of malicious code and software
                 vulnerability exploits on embedded platforms is
                 constantly on the rise. Yet, little effort is being
                 devoted to combating such threats to embedded systems.
                 Moreover, adapting security approaches designed for
                 general-purpose systems generally fails because of the
                 limited processing capabilities of their embedded
                 counterparts. In this work, we evaluate a malware and
                 software vulnerability exploit defense framework for
                 embedded systems. The proposed framework extends our
                 prior work, which defines two isolated execution
                 environments: a testing environment, wherein an
                 untrusted application is first tested using dynamic
                 binary instrumentation (DBI), and a real environment,
                 wherein a program is monitored at runtime using an
                 extracted behavioral model, along with a continuous
                 learning process.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Berendsen:2011:FSA,
  author =       "Jasper Berendsen and Biniam Gebremichael and Frits W.
                 Vaandrager and Miaomiao Zhang",
  title =        "Formal specification and analysis of {Zeroconf} using
                 {Uppaal}",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "34:1--34:32",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952527",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The model checker Uppaal is used to formally model and
                 analyze parts of Zeroconf, a protocol for dynamic
                 configuration of IPv4 link-local addresses that has
                 been defined in RFC 3927 of the IETF. Our goal has been
                 to construct a model that (a) is easy to understand by
                 engineers, (b) comes as close as possible to the
                 informal text (for each transition in the model there
                 should be a corresponding piece of text in the RFC),
                 and (c) may serve as a basis for formal verification.
                 Our modeling efforts revealed several errors (or at
                 least ambiguities) in the RFC that no one else spotted
                 before.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ykman-Couvreur:2011:FMM,
  author =       "Ch. Ykman-Couvreur and V. Nollet and F. Catthoor and
                 H. Corporaal",
  title =        "Fast multidimension multichoice knapsack heuristic for
                 {MP-SoC} runtime management",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952528",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Since the application complexity is growing and
                 applications can be dynamically activated, the major
                 challenge for heterogeneous multiprocessor platforms is
                 to select at runtime an energy-efficient mapping of
                 these applications. Taking into account that many
                 different possible implementations per application can
                 be available, and that the selection must meet the
                 application deadlines under the available platform
                 resources, this runtime optimization problem can be
                 modeled as a Multidimension Multichoice Knapsack
                 Problem (MMKP), which is known to be NP-hard. Not only
                 algorithms for an optimal solution, but also
                 state-of-the-art heuristics for real-time systems are
                 still too slow for runtime management of multiprocessor
                 platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ragel:2011:HHS,
  author =       "Roshan G. Ragel and Sri Parameswaran",
  title =        "A hybrid hardware--software technique to improve
                 reliability in embedded processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952529",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Numerous methods have been described in research
                 literature with methods to improve reliability of
                 processors by the use of control-flow checking. High
                 performance and code-size penalties cripple the
                 proposed software approaches, while hardware approaches
                 are not scalable and are thus rarely implemented in
                 real embedded systems. In this article, we show that by
                 including control-flow checking as an issue to be
                 considered when designing as embedded processor, we are
                 able to reduce overheads considerably and still provide
                 a scalable solution to this problem. The technique
                 described in this article includes architectural
                 improvements to the processor and binary rewriting of
                 the application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huynh:2011:EAR,
  author =       "Johnny Huynh and Jos{\'e} Nelson Amaral and Paul
                 Berube and Sid-Ahmed-Ali Touati",
  title =        "Evaluating address register assignment and offset
                 assignment algorithms",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952530",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In digital signal processors (DSPs), variables are
                 accessed using $k$ address registers. The problem of
                 finding a memory layout, for a set of variables, that
                 minimizes the address-computation overhead is known as
                 the General Offset Assignment (GOA) problem. The most
                 common approach to this problem is to partition the set
                 of variables into $k$ partitions and to assign each
                 partition to an address register. Thus, effectively
                 decomposing the GOA problem into several Simple Offset
                 Assignment (SOA) problems. Many heuristic-based
                 algorithms are proposed in the literature to
                 approximate solutions to both the variable partitioning
                 and the SOA problems. However, the address-computation
                 overhead of the resulting memory layouts are not
                 accurately evaluated.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Diguet:2011:CLB,
  author =       "Jean-Philippe Diguet and Yvan Eustache and Guy
                 Gogniat",
  title =        "Closed-loop--based self-adaptive {Hardware\slash
                 Software-Embedded} systems: Design methodology and
                 smart {CAM} case study",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1952522.1952531",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents our methodology for implementing
                 self-adaptivness within an OS-based and reconfigurable
                 embedded system according to objectives such as quality
                 of service, performance, or power consumption. We
                 detail our approach to separate application-specific
                 decisions and hardware\slash software-implementation
                 decisions at system level. The former are related to
                 the efficiency control of applications and based on the
                 knowledge of application engineers. The latter are
                 generic and address the choice between various hardware
                 and software implementations according to user
                 objectives. The decision management is implemented as
                 an adaptive closed-loop model. We describe how each
                 design step may be implemented and especially how we
                 solved the issue of stability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gamatie:2011:MDD,
  author =       "Abdoulaye Gamati{\'e} and S{\'e}bastien {Le Beux} and
                 {\'E}ric Piel and Rabie {Ben Atitallah} and Anne Etien
                 and Philippe Marquet and Jean-Luc Dekeyser",
  title =        "A Model-Driven Design Framework for Massively Parallel
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "39:1--39:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043663",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded systems integrate more and more
                 complex functionalities. At the same time, the
                 semiconductor technology advances enable to increase
                 the amount of hardware resources on a chip for the
                 execution. Massively parallel embedded systems
                 specifically deal with the optimized usage of such
                 hardware resources to efficiently execute their
                 functionalities. The design of these systems mainly
                 relies on the following challenging issues: first, how
                 to deal with the parallelism in order to increase the
                 performance; second, how to abstract their
                 implementation details in order to manage their
                 complexity; third, how to refine these abstract
                 representations in order to produce efficient
                 implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2011:DPT,
  author =       "Seungkyun Kim and Kiwon Kwon and Chihun Kim and
                 Choonki Jang and Jaejin Lee and Sang Lyul Min",
  title =        "Demand Paging Techniques for Flash Memory Using
                 Compiler Post-Pass Optimizations",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "40:1--40:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043664",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose an application-specific
                 demand paging mechanism for low-end embedded systems
                 that have flash memory as secondary storage. These
                 systems are not equipped with virtual memory. A small
                 memory space called an execution buffer is used to page
                 the code of an application. An application-specific
                 page manager manages the buffer. The page manager is
                 automatically generated by a compiler post-pass
                 optimizer and combined with the application image. The
                 post-pass optimizer analyzes the executable image and
                 transforms function call/return instructions into calls
                 to the page manager. As a result, each function in the
                 code can be loaded into the memory on demand at
                 runtime.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dini:2011:LLA,
  author =       "Gianluca Dini and Ida M. Savino",
  title =        "{LARK}: a Lightweight Authenticated {ReKeying} Scheme
                 for Clustered Wireless Sensor Networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "41:1--41:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043665",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Group communication has proven a powerful paradigm for
                 designing applications and services in Wireless Sensor
                 Networks (WSNs). Given the tight interaction between
                 WSNs and the physical world, a security infringement
                 may translate into a safety infringement. Therefore, in
                 order to fully exploit the group communication paradigm
                 we need to secure it. Traditionally, this requirement
                 has been formalized in terms of backward and forward
                 security and fulfilled by means of rekeying. In WSNs,
                 group rekeying becomes particularly a complex problem
                 because communication takes place over an easily
                 accessible wireless medium and because sensor nodes
                 have severe limitations in terms of computing, storage,
                 energy, and tamper-resistance capabilities for cost
                 reasons.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schoeberl:2011:HAL,
  author =       "Martin Schoeberl and Stephan Korsholm and Tomas
                 Kalibera and Anders P. Ravn",
  title =        "A Hardware Abstraction Layer in {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "42:1--42:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043666",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems use specialized hardware devices to
                 interact with their environment, and since they have to
                 be dependable, it is attractive to use a modern,
                 type-safe programming language like Java to develop
                 programs for them. Standard Java, as a
                 platform-independent language, delegates access to
                 devices, direct memory access, and interrupt handling
                 to some underlying operating system or kernel, but in
                 the embedded systems domain resources are scarce and a
                 Java Virtual Machine (JVM) without an underlying
                 middleware is an attractive architecture. The
                 contribution of this article is a proposal for Java
                 packages with hardware objects and interrupt handlers
                 that interface to such a JVM.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gilroy:2011:RHA,
  author =       "Michael Gilroy and James Irvine and Robert Atkinson",
  title =        "{RAID 6} Hardware Acceleration",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "43:1--43:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Inexpensive, reliable hard disk storage is
                 increasingly required in both businesses and the home.
                 As disk capacities increase and multiple drives are
                 combined in one system the probability of multiple disk
                 failures increases. Through the adoption of RAID 6 the
                 capability to recover from up to two simultaneous disk
                 failures becomes available. In this article, we present
                 three different RAID 6 implementations each tailored to
                 support different target applications and optimized to
                 reduce overall hardware resource utilization. We
                 present an optimal Reed-Solomon-based RAID 6
                 implementation for arrays of four disks. We also
                 present the smallest in terms of hardware resource
                 utilization as well having the highest throughput RAID
                 6 hardware solution for disk arrays of up to 15
                 drives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhuang:2011:CST,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Compiler-Supported Thread Management for Multithreaded
                 Network Processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "44:1--44:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043668",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traditionally, runtime management involving CPU
                 sharing, real-time scheduling, etc., is provided by the
                 runtime environment (typically an operating system)
                 using hardware support such as timers and interrupts.
                 However, due to stringent performance requirements on
                 network processors, neither OS nor hardware mechanisms
                 are typically feasible/available. Mapping packet
                 processing tasks on network processors involves complex
                 trade-offs to maximize parallelism and pipelining. Due
                 to an increase in the size of the code store and
                 complexity of application requirements, network
                 processors are being programmed with heterogeneous
                 threads that may execute code belonging to different
                 tasks on a given micro-engine. Also, most network
                 applications are streaming applications that are
                 typically processed in a pipelined fashion.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stuart:2011:RRN,
  author =       "Matthias Bo Stuart and Mikkel Bystrup Stensgaard and
                 Jens Spars{\o}",
  title =        "The {ReNoC} Reconfigurable {Network-on-Chip}:
                 Architecture, Configuration Algorithms, and
                 Evaluation",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "45:1--45:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043669",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a reconfigurable network-on-chip
                 architecture called ReNoC, which is intended for use in
                 general-purpose multiprocessor system-on-chip
                 platforms, and which enables application-specific
                 logical NoC topologies to be configured, thus providing
                 both efficiency and flexibility. The article presents
                 three novel algorithms that synthesize an
                 application-specific NoC topology, map it onto the
                 physical ReNoC architecture, and create deadlock-free,
                 application-specific routing algorithms. We apply our
                 algorithms to a mixture of real and synthetic
                 applications and target three different physical
                 architectures. Compared to a conventional NoC, ReNoC
                 reduces power consumption by up to 58\% on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cucinotta:2011:RMA,
  author =       "Tommaso Cucinotta and Luca Abeni and Luigi Palopoli
                 and Giuseppe Lipari",
  title =        "A Robust Mechanism for Adaptive Scheduling of
                 Multimedia Applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "46:1--46:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043670",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose an adaptive scheduling technique to
                 schedule highly dynamic multimedia tasks on a CPU. We
                 use a combination of two techniques: the first one is a
                 feedback mechanism to track the resource requirements
                 of the tasks based on ``local'' observations. The
                 second one is a mechanism that operates with a
                 ``global'' visibility, reclaiming unused bandwidth. The
                 combination proves very effective: resource reclaiming
                 increases the robustness of the feedback, while the
                 identification of the correct bandwidth made by the
                 feedback increases the effectiveness of the
                 reclamation. We offer both theoretical results and an
                 extensive experimental validation of the approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Touati:2011:ESR,
  author =       "Sid-Ahmed-Ali Touati and Frederic Brault and Karine
                 Deschinkel and Beno{\^\i}t Dupont de Dinechin",
  title =        "Efficient Spilling Reduction for Software Pipelined
                 Loops in Presence of Multiple Register Types in
                 Embedded {VLIW} Processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "47:1--47:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043671",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Integrating register allocation and software
                 pipelining of loops is an active research area. We
                 focus on techniques that precondition the dependence
                 graph before software pipelining in order to ensure
                 that no register spill instructions are inserted by the
                 register allocator in the software pipelined loop. If
                 spilling is not necessary for the input code,
                 preconditioning techniques insert dependence arcs so
                 that the maximum register pressure MAXLIVE achieved by
                 any loop schedule is below the number of available
                 registers, without hurting the initiation interval if
                 possible. When a solution exists, a spill-free software
                 pipeline is guaranteed to exist. Existing
                 preconditioning techniques consider one register type
                 (register class) at a time [Deschinkel and Touati
                 2008].",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2011:ARA,
  author =       "Gang Zhou and Qiang Li and Jingyuan Li and Yafeng Wu
                 and Shan Lin and Jian Lu and Chieh-Yih Wan and Mark D.
                 Yarvis and John A. Stankovic",
  title =        "Adaptive and Radio-Agnostic {QoS} for Body Sensor
                 Networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "48:1--48:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2043662.2043672",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As wireless devices and sensors are increasingly
                 deployed on people, researchers have begun to focus on
                 wireless body-area networks. Applications of wireless
                 body sensor networks include healthcare, entertainment,
                 and personal assistance, in which sensors collect
                 physiological and activity data from people and their
                 environments. In these body sensor networks, quality of
                 service is needed to provide reliable data
                 communication over prioritized data streams. This
                 article proposes BodyQoS, the first running QoS system
                 demonstrated on an emulated body sensor network.
                 BodyQoS adopts an asymmetric architecture, in which
                 most processing is done on a resource-rich aggregator,
                 minimizing the load on resource-limited sensor nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wandeler:2012:UGS,
  author =       "Ernesto Wandeler and Alexander Maxiaguine and Lothar
                 Thiele",
  title =        "On the use of greedy shapers in real-time embedded
                 systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146418",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traffic shaping is a well-known technique in the area
                 of networking and is proven to reduce global buffer
                 requirements and end-to-end delays in networked
                 systems. Due to these properties, shapers also play an
                 increasingly important role in the design of
                 multiprocessor embedded systems that exhibit a
                 considerable amount of on-chip traffic. Despite the
                 growing importance of traffic shapping in this area, no
                 methods exist for analyzing shapers in distributed
                 embedded systems and for incorporating them into a
                 system-level performance analysis. Until now it was not
                 possible to determine the effect of shapers on
                 end-to-end delay guarantees or buffer requirements in
                 such systems. In this work, we present a method for
                 analyzing greedy shapers, and we embed this analysis
                 method into a well-established modular performance
                 analysis framework for real-time embedded systems. The
                 presented approach enables system-level performance
                 analysis of complete systems with greedy shapers, and
                 we prove its applicability by analyzing three case
                 study systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hamers:2012:EMS,
  author =       "Juan Hamers and Lieven Eeckhout",
  title =        "Exploiting media stream similarity for
                 energy-efficient decoding and resource prediction",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146419",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces a novel approach to
                 energy-efficient media stream decoding that is based on
                 the notion of media stream similarity. The key idea is
                 that platform-independent scenarios with similar
                 decoding complexity can be identified within and across
                 media streams. A device that decodes a media stream
                 annotated with scenario information can then adjust its
                 processor clock frequency and voltage level based on
                 these scenarios for lower energy consumption. Our
                 evaluation, done using the H.264 AVC decoder and 12
                 reference video streams, shows an average energy
                 reduction of 44\% while missing less than 0.2\% of the
                 frame deadlines using scenario-driven video decoding.
                 An additional application of scenario-based media
                 stream annotation is to predict required resources
                 (compute power and energy) for consuming a given
                 service on a given device. Resource prediction is
                 extremely useful in a client-server setup in which the
                 client requests a media service from the server or
                 content provider. The content provider (in cooperation
                 with the client) can then determine what service
                 quality to deliver, given the client's available
                 resources. Scenario-aware resource prediction can
                 predict (compute power and energy) consumption with
                 errors less than 4\% (and an overall average 1.4\%
                 error).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhong:2012:WSN,
  author =       "Ziguo Zhong and Tian He",
  title =        "Wireless sensor node localization by multisequence
                 processing",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146420",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless Sensor Networks have been proposed for use in
                 many location-dependent applications. Most of these
                 need to identify the locations of sensor nodes, a
                 challenging task because of severe constraints on cost,
                 energy and effective range of sensor devices. To
                 overcome limitations in existing solutions, we present
                 a Multi-Sequence Positioning (MSP) method for
                 large-scale stationary sensor node localization in
                 outdoor environments. The novel idea behind MSP is to
                 reconstruct and estimate two-dimensional location
                 information for each sensor node by processing multiple
                 one-dimensional node sequences, easily obtained through
                 loosely guided event distribution. Starting from a
                 basic MSP design, we propose four optimizations that
                 work together to increase localization accuracy. We
                 address several interesting issues such as incomplete
                 (partial) node sequences and sequence flip, found in
                 the Mirage test-bed we built. We have evaluated the MSP
                 system through theoretical analysis, extensive
                 simulation as well as two physical systems (an indoor
                 version with 46 MICAz motes and an outdoor version with
                 20 MICAz motes). Evaluation demonstrates that MSP can
                 achieve an accuracy within one foot, requiring neither
                 additional costly hardware on sensor nodes nor precise
                 event distribution. In fact, it provides a nice
                 tradeoff between physical cost (anchors) and soft cost
                 (events) while maintaining localization accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Peng:2012:BHA,
  author =       "Chunyi Peng and Guobin Shen and Yongguang Zhang",
  title =        "{BeepBeep}: a high-accuracy acoustic-based system for
                 ranging and localization using {COTS} devices",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146421",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present the design and implementation of BeepBeep,
                 a high-accuracy acoustic-based system for ranging and
                 localization. It is a pure software-based solution and
                 uses the most basic set of commodity hardware --- a
                 speaker, a microphone, and some form of interdevice
                 communication. The ranging scheme works without any
                 infrastructure and is applicable to sensor platforms
                 and commercial-off-the-shelf mobile devices. It
                 achieves high accuracy through three techniques:
                 two-way sensing, self-recording, and sample counting.
                 We further devise a scalable and fast localization
                 scheme. Our experiments show that up to one-centimeter
                 ranging accuracy and three-centimeter localization
                 accuracy can be achieved.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kumar:2012:CMA,
  author =       "T. S. Rajesh Kumar and R. Govindarajan and C. P.
                 Ravikumar",
  title =        "On-chip memory architecture exploration framework for
                 {DSP} processor-based embedded system on chip",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146422",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today's SoCs are complex designs with multiple
                 embedded processors, memory subsystems, and application
                 specific peripherals. The memory architecture of
                 embedded SoCs strongly influences the power and
                 performance of the entire system. Further, the memory
                 subsystem constitutes a major part (typically up to
                 70\%) of the silicon area for the current day SoC. In
                 this article, we address the on-chip memory
                 architecture exploration for DSP processors which are
                 organized as multiple memory banks, where banks can be
                 single/dual ported with non-uniform bank sizes. In this
                 paper we propose two different methods for physical
                 memory architecture exploration and identify the
                 strengths and applicability of these methods in a
                 systematic way. Both methods address the memory
                 architecture exploration for a given target application
                 by considering the application's data access
                 characteristics and generates a set of Pareto-optimal
                 design points that are interesting from a power,
                 performance and VLSI area perspective. To the best of
                 our knowledge, this is the first comprehensive work on
                 memory space exploration at physical memory level that
                 integrates data layout and memory exploration to
                 address the system objectives from both hardware design
                 and application software development perspective.
                 Further we propose an automatic framework that explores
                 the design space identifying 100's of Pareto-optimal
                 design points within a few hours of running on a
                 standard desktop configuration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pande:2012:PDP,
  author =       "Amit Pande and Joseph Zambreno",
  title =        "{Poly-DWT}: {Polymorphic} wavelet hardware support for
                 dynamic image compression",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146423",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many modern computing applications have been enabled
                 through the use of real-time multimedia processing.
                 While several hardware architectures have been proposed
                 in the research literature to support such primitives,
                 these fail to address applications whose performance
                 and resource requirements have a dynamic aspect.
                 Embedded multimedia systems typically need a power and
                 computation efficient design in addition to good
                 compression performance. In this article, we introduce
                 a Polymorphic Wavelet Architecture (Poly-DWT) as a
                 crucial building block towards the development of
                 embedded systems to address such challenges. We
                 illustrate how our Poly-DWT architecture can
                 potentially make dynamic resource allocation decisions,
                 such as the internal bit representation and the
                 processing kernel, according to the application
                 requirements. We introduce a filter switching
                 architecture that allows for dynamic switching between
                 5/3 and 9/7 wavelet filters and leads to a more power
                 efficient design. Further, a multiplier-free design
                 with a low adder requirement demonstrates the potential
                 of Poly-DWT for embedded systems. Through an FPGA
                 prototype, we perform a quantitative analysis of our
                 Poly-DWT architecture, and compare our filter to
                 existing approaches to illustrate the area and
                 performance benefits inherent in our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2012:RGV,
  author =       "Suk-Hyun Seo and Jin-Ho Kim and Sung-Ho Hwang and Key
                 Ho Kwon and Jae Wook Jeon",
  title =        "A reliable gateway for in-vehicle networks based on
                 {LIN}, {CAN}, and {FlexRay}",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146424",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article describes a reliable gateway for
                 in-vehicle networks. Such networks include local
                 interconnect networks, controller area networks, and
                 FlexRay. There is some latency when transferring a
                 message from one node (source) to another node
                 (destination). A high probability of error exists due
                 to different protocol specifications such as baud-rate,
                 and message frame format. Therefore, deploying a
                 reliable gateway is a challenge to the automotive
                 industry. We propose a reliable gateway based on the
                 OSEK/VDX components for in-vehicle networks. We also
                 examine the gateway system developed, and then we
                 evaluate the performance of our proposed system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2012:EFP,
  author =       "Kai Huang and Wolfgang Haid and Iuliana Bacivarov and
                 Matthias Keller and Lothar Thiele",
  title =        "Embedding formal performance analysis into the design
                 cycle of {MPSoCs} for real-time streaming
                 applications",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146425",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern real-time streaming applications are
                 increasingly implemented on multiprocessor
                 systems-on-chip (MPSoC). The implementation, as well as
                 the verification of real-time applications executing on
                 MPSoCs, are difficult tasks, however. A major challenge
                 is the performance analysis of MPSoCs, which is
                 required for early design space exploration and final
                 system verification. Simulation-based methods are not
                 well-suited for this purpose, due to long runtimes and
                 non-exhaustive corner-case coverage. To overcome these
                 limitations, formal performance analysis methods that
                 provide guarantees for meeting real-time constraints
                 have been developed. Embedding formal performance
                 analysis into the MPSoC design cycle requires the
                 generation of a faithful analysis model and its
                 calibration with the system-specific parameters. In
                 this article, a design flow that automates these steps
                 is presented. In particular, we integrate modular
                 performance analysis (MPA) into the distributed
                 operation layer (DOL) MPSoC programming environment.
                 The result is an MPSoC software design flow that allows
                 for automatically generating the system implementation,
                 together with an analysis model for system
                 verification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2012:AFS,
  author =       "Yuan-Hao Chang and Po-Liang Wu and Tei-Wei Kuo and
                 Shih-Hao Hung",
  title =        "An adaptive file-system-oriented {FTL} mechanism for
                 flash-memory storage systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146426",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As flash memory becomes popular over various
                 platforms, there is a strong demand regarding the
                 performance degradation problem, due to the special
                 characteristics of flash memory. This research proposes
                 the design of a file-system-oriented flash translation
                 layer, in which a filter mechanism is designed to
                 separate the access requests of file-system metadata
                 and file contents for better performance. A recovery
                 scheme is then proposed for maintaining the integrity
                 of a file system. The proposed flash translation layer
                 is implemented as a Linux device driver and evaluated
                 with respect to ext2 and ext3 file systems. Experiments
                 were also done over NTFS by a series of realistic
                 traces. The experimental results show significant
                 performance improvement over ext2, ext3, and NTFS file
                 systems with limited system overheads.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2012:SRS,
  author =       "Chunxiao Li and Niraj K. Jha and Anand Raghunathan",
  title =        "Secure reconfiguration of software-defined radio",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2146417.2146427",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software-defined radio (SDR) implements a radio system
                 in software that executes on a programmable processor.
                 The components of SDR, such as the filters, amplifiers,
                 and modulators, can be easily reconfigured to adapt to
                 the operating environment and user preferences.
                 However, the flexibility of radio reconfiguration
                 brings along the serious security concern of malicious
                 modification of software in the SDR system, leading to
                 radio malfunction and interference with other users'
                 communications. Both the SDR device and the network
                 need to be protected from such malicious radio
                 reconfiguration. In this article, a new architecture
                 targeted at protecting SDR devices from malicious
                 reconfiguration is proposed. The architecture is based
                 on robust separation of the radio operation environment
                 and user application environment, through the use of
                 virtualization. A new radio middleware layer is
                 designed to securely intercept all attempts to
                 reconfigure the radio, and a security policy monitor
                 checks the target configuration against security
                 policies that represent the interests of various
                 parties. Even if the operating system in the user
                 application environment is compromised, the proposed
                 architecture can ensure secure reconfiguration in the
                 radio operation environment. We have prototyped the
                 proposed secure SDR architecture using VMware and the
                 GNU Radio toolkit and demonstrate that overheads
                 incurred by the architecture are small and tolerable.
                 Therefore, we believe that the proposed solution could
                 be applied to address secure SDR reconfiguration in
                 both general-purpose and embedded computing systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Berekovic:2012:ISS,
  author =       "Mladen Berekovic and Samarjit Chakraborty and Petru
                 Eles and Andy D. Pimentel",
  title =        "Introduction to the {Special Section on
                 ESTIMedia'08}",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "11:1--11:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180891",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2012:PAR,
  author =       "Jun Zhu and Ingo Sander and Axel Jantsch",
  title =        "Performance Analysis of Reconfigurations in Adaptive
                 Real-Time Streaming Applications",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "12:1--12:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180888",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a performance analysis framework for
                 adaptive real-time synchronous data flow streaming
                 applications on runtime reconfigurable FPGAs. As the
                 main contribution, we present a constraint based
                 approach to capture both streaming application
                 execution semantics and the varying design concerns
                 during reconfigurations. With our event models
                 constructed as cumulative functions on data streams, we
                 exploit a novel compile-time analysis framework based
                 on iterative timing phases. Finally, we implement our
                 framework on a public domain constraint solver, and
                 illustrate its capabilities in the analysis of design
                 trade-offs due to reconfigurations with experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hsieh:2012:PBP,
  author =       "Kun-Yuan Hsieh and Chi-Hua Lai and Shang-Hong Lai and
                 Jenq Kuen Lee",
  title =        "Parallelization of Belief Propagation on {Cell}
                 Processors for Stereo Vision",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "13:1--13:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180889",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Markov random field models provide a robust
                 formulation for the stereo vision problem of inferring
                 three-dimensional scene geometry from two images taken
                 from different viewpoints. One of the most advanced
                 algorithms for solving the associated energy
                 minimization problem in the formulation is belief
                 propagation (BP). Although BP provides very accurate
                 results in solving stereo vision problems, the high
                 computational cost of the algorithm hinders it from
                 real-time applications. In recent years, multicore
                 architectures have been widely adopted in various
                 industrial application domains. The high computing
                 power of multicore processors provides new
                 opportunities to implement stereo vision algorithms.
                 This article examines and extracts the parallelisms in
                 the BP method for stereo vision on multicore
                 processors. This article shows that parallelism of the
                 algorithm can be efficiently utilized on multicore
                 processors. The results show that parallelization on
                 multicore processors provides a speedup for the BP
                 algorithm of almost 15 times compared to the
                 single-processor implementation on the PPE of the Cell
                 BE. The experimental results also indicate that a frame
                 rate of 6.5 frames/second is possible when implementing
                 the parallelized BP algorithm on the multicore
                 processor of Cell BE with one PPE and six SPEs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Terechko:2012:BPS,
  author =       "Andrei Terechko and Jan Hoogerbrugge and Ghiath Alkadi
                 and Surendra Guntur and Anirban Lahiri and Marc
                 Duranton and Clemens W{\"u}st and Phillip Christie and
                 Axel Nackaerts and Aatish Kumar",
  title =        "Balancing Programmability and Silicon Efficiency of
                 Heterogeneous Multicore Architectures",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "14:1--14:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180890",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multicore architectures provide scalable performance
                 with a lower hardware design effort than single core
                 processors. Our article presents a design methodology
                 and an embedded multicore architecture, focusing on
                 reducing the software design complexity and boosting
                 the performance density. First, we analyze
                 characteristics of the Task-Level Parallelism in modern
                 multimedia workloads. These characteristics are used to
                 formulate requirements for the programming model. Then
                 we translate the programming model requirements to an
                 architecture specification, including a novel
                 low-complexity implementation of cache coherence and a
                 hardware synchronization unit. Our evaluation
                 demonstrates that the novel coherence mechanism
                 substantially simplifies hardware design, while
                 reducing the performance by less than 18\% relative to
                 a complex snooping technique. Compared to a single
                 processor core, the multicores have already proven to
                 be more area- and energy-efficient. However, the
                 multicore architectures in embedded systems still
                 compete with highly efficient function-specific
                 hardware accelerators. In this article we identify five
                 architectural methods to boost performance density of
                 multicores; microarchitectural downscaling, asymmetric
                 multicore architectures, multithreading, generic
                 accelerators, and conjoining. Then, we present a novel
                 methodology to explore multicore design spaces,
                 including the architectural methods improving the
                 performance density. The methodology is based on a
                 complex formula computing performances of heterogeneous
                 multicore systems. Using this design space exploration
                 methodology for HD and QuadHD H.264 video decoding, we
                 estimate that the required areas of multicores in CMOS
                 45 nm are 2.5 mm$^2$ and 8.6 mm$^2$, respectively.
                 These results suggest that heterogeneous multicores are
                 cost-effective for embedded applications and can
                 provide a good programmability support.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khajeh:2012:EAA,
  author =       "Amin Khajeh and Minyoung Kim and Nikil Dutt and Ahmed
                 M. Eltawil and Fadi J. Kurdahi",
  title =        "Error-Aware Algorithm\slash Architecture Coexploration
                 for Video Over Wireless Applications",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "15:1--15:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180892",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a cross-layer
                 algorithm/architecture coexploration for wireless
                 multimedia systems to coordinate interactions among
                 sublayer optimizers for improvements in
                 energy/QoS/reliability. By exploiting the inherent
                 redundancy in wireless multimedia systems, we generate
                 an expanded design space over traditional
                 layer-specific approaches. Specifically, we control the
                 error resilient encoder at the application layer to
                 provide awareness of architectural exploration at the
                 physical layer allowing new design points with lower
                 power consumption via aggressive voltage scaling. While
                 trying to reduce energy consumption, the fault tolerant
                 technique compensates the effect of the hardware and
                 network errors due to aggressive voltage scaling and
                 lossy transmission, respectively. Our experiments on
                 H.263 video over a WCDMA communication system
                 demonstrate that coexploration enlarges the feasible
                 design space, which results in significant power
                 savings of more than 20\% in the WCDMA modem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Salamy:2012:SOT,
  author =       "Hassan Salamy and J. Ramanujam",
  title =        "Storage Optimization through Offset Assignment with
                 Variable Coalescing",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "16:1--16:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180893",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most modern digital signal processors (DSPs) provide
                 multiple address registers and a dedicated address
                 generation unit (AGU) which performs address generation
                 in parallel to instruction execution. There is no
                 address computation overhead if the next address is
                 within the auto-modify range. A careful placement of
                 variables in memory is utilized to decrease the number
                 of address arithmetic instructions and thus to generate
                 compact and efficient code. The simple offset
                 assignment (SOA) problem concerns the layout of
                 variables for machines with one address register and
                 the general offset assignment (GOA) deals with multiple
                 address registers. Both these problems assume that each
                 variable needs to be allocated for the entire duration
                 of a program. Both SOA and GOA are NP-complete. In this
                 article, we present effective heuristics for the simple
                 and the general offset assignment problems with
                 variable coalescing where two or more non-interfering
                 variables can be mapped into the same memory location.
                 Results on several benchmarks show the significant
                 improvement of our proposed heuristics compared to
                 other heuristics in the literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Falk:2012:ISS,
  author =       "Heiko Falk and Peter Marwedel",
  title =        "Introduction to the {Special Section on SCOPES'09}",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "17:1--17:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180894",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2012:FLF,
  author =       "Jaegeuk Kim and Hyotaek Shim and Seon-Yeong Park and
                 Seungryoul Maeng and Jin-Soo Kim",
  title =        "{FlashLight}: a Lightweight Flash File System for
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "18:1--18:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180895",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A very promising approach for using NAND flash memory
                 as a storage medium is a flash file system. In order to
                 design a higher-performance flash file system, two
                 issues should be considered carefully. One issue is the
                 design of an efficient index structure that contains
                 the locations of both files and data in the flash
                 memory. For large-capacity storage, the index structure
                 must be stored in the flash memory to realize low
                 memory consumption; however, this may degrade the
                 system performance. The other issue is the design of a
                 novel garbage collection (GC) scheme that reclaims
                 obsolete pages. This scheme can induce considerable
                 additional read and write operations while identifying
                 and migrating valid pages. In this article, we present
                 a novel flash file system that has the following
                 features: (i) a lightweight index structure that
                 introduces the hybrid indexing scheme and intra-inode
                 index logging, and (ii) an efficient GC scheme that
                 adopts a dirty list with an on-demand GC approach as
                 well as fine-grained data separation and erase-unit
                 data allocation. We implemented FlashLight in a Linux
                 OS with kernel version 2.6.21 on an embedded device.
                 The experimental results obtained using several
                 benchmark programs confirm that FlashLight improves the
                 performance by up to 27.4\% over UBIFS by alleviating
                 index management and GC overheads by up to 33.8\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Eriksson:2012:ICG,
  author =       "Mattias Eriksson and Christoph Kessler",
  title =        "Integrated Code Generation for Loops",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "19:1--19:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180896",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Code generation in a compiler is commonly divided into
                 several phases: instruction selection, scheduling,
                 register allocation, spill code generation, and, in the
                 case of clustered architectures, cluster assignment.
                 These phases are interdependent; for instance, a
                 decision in the instruction selection phase affects how
                 an operation can be scheduled We examine the effect of
                 this separation of phases on the quality of the
                 generated code. To study this we have formulated
                 optimal methods for code generation with integer linear
                 programming; first for acyclic code and then we extend
                 this method to modulo scheduling of loops. In our
                 experiments we compare optimal modulo scheduling, where
                 all phases are integrated, to modulo scheduling, where
                 instruction selection and cluster assignment are done
                 in a separate phase. The results show that, for an
                 architecture with two clusters, the integrated method
                 finds a better solution than the nonintegrated method
                 for 27\% of the instances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Murray:2012:ASL,
  author =       "Alastair Murray and Bj{\"o}rn Franke",
  title =        "Adaptive Source-Level Data Assignment to Dual Memory
                 Banks",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "20:1--20:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180897",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dual memory banks provide extra memory bandwidth to
                 DSP applications and enable simultaneous access to two
                 operands if the data is partitioned appropriately.
                 Fully automated and compiler integrated approaches to
                 data partitioning and memory bank assignment have,
                 however, found little acceptance by DSP software
                 developers. In this article we present a novel
                 source-level approach that is more programmer friendly.
                 Our scheme is based on soft graph coloring and highly
                 adaptive heuristics generated by genetic programming.
                 We have evaluated our scheme on an Analog Devices
                 TigerSHARC TS-101 DSP and achieved speedups of up to
                 57\% on 13 UTDSP benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boissinot:2012:SPR,
  author =       "Benoit Boissinot and Philip Brisk and Alain Darte and
                 Fabrice Rastello",
  title =        "{SSI} Properties Revisited",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "21:1--21:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180898",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The static single information (SSI) form is an
                 extension of the static single assignment (SSA) form, a
                 well-established compiler intermediate representation
                 that has been successfully used for numerous compiler
                 analysis and optimizations. Several interesting results
                 have also been shown for SSI form concerning liveness
                 analysis and the representation of live-ranges of
                 variables, which could make SSI form appealing for
                 just-in-time compilation. Unfortunately, we have
                 uncovered several mistakes in the previous literature
                 on SSI form, which, admittedly, is already quite
                 sparse. This article corrects the mistakes that are
                 most germane to SSI form. We first explain why the two
                 definitions of SSI form proposed in past literature,
                 first by C. S. Ananian, then by J. Singer, are not
                 equivalent. Our main result is then to prove that basic
                 blocks, and thus program points, can be totally ordered
                 so that live-ranges of variables correspond to
                 intervals on a line, a result that holds for both
                 variants of SSI form. In other words, in SSI form, the
                 intersection graph defined by live-ranges is an
                 interval graph, a stronger structural property than for
                 SSA form for which the intersection graph of
                 live-ranges is chordal. Finally, we show how this
                 structure of live-ranges can be used to simplify
                 liveness analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Franke:2012:SPM,
  author =       "Bj{\"o}rn Franke",
  title =        "Statistical Performance Modeling in Functional
                 Instruction Set Simulators",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "22:1--22:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180899",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite the recent progress in improving the speed of
                 instruction-accurate simulators cycle-accurate
                 simulation is still prohibitively slow for all but the
                 most basic programs. In this article we present a
                 statistical machine learning approach to performance
                 estimation in fast, instruction accurate simulators and
                 evaluate our methodology comprehensively against three
                 popular embedded RISC processors and about 300 embedded
                 applications. We show that our methodology is capable
                 of providing accurate performance estimations with an
                 average error of less than 3.9\% while, on average,
                 operating $ \approx 14.5 $ times faster than
                 cycle-accurate simulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chandraiah:2012:CAR,
  author =       "Pramod Chandraiah and Rainer D{\"o}mer",
  title =        "Computer-Aided Recoding to Create Structured and
                 Analyzable System Models",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "23:1--23:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180900",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded system design, the quality of the input
                 model has a direct bearing on the effectiveness of the
                 system exploration and synthesis tools. Given a
                 well-written system model, tools today are effective in
                 generating working implementations. However, readily
                 available C reference code is not conducive for
                 immediate system synthesis as it lacks needed features
                 for automatic analysis and synthesis. Among others, the
                 lack of proper structure and the presence of
                 intractable pointers in the reference code are factors
                 that seriously hamper the effectiveness of system
                 design tools. To overcome these deficiencies, we aim to
                 automate the conversion of flat C code into a
                 well-structured system model by applying automated
                 source code transformations. We present a set of
                 computer-aided recoding operations that enable the
                 system designer to mitigate pointer problems and
                 quickly create the necessary structural hierarchy so
                 that the design model becomes easily analyzable and
                 synthesizable. Utilizing the designer's knowledge, our
                 interactive recoding transformations aid the designer
                 in efficiently creating well-structured system models
                 for rapid design space exploration and successful
                 synthesis. Our estimated and measured experimental
                 results show significant productivity gains through a
                 substantial reduction of the model creation time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dubach:2012:EPE,
  author =       "Christophe Dubach and Timothy M. Jones and Michael F.
                 P. O'Boyle",
  title =        "Exploring and Predicting the Effects of
                 Microarchitectural Parameters and Compiler
                 Optimizations on Performance and Energy",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "24:1--24:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2180887.2180901",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded processor performance is dependent on both
                 the underlying architecture and the compiler
                 optimizations applied. However, designing both
                 simultaneously is extremely difficult to achieve due to
                 the time constraints designers must work under.
                 Therefore, current methodology involves designing
                 compiler and architecture in isolation, leading to
                 suboptimal performance of the final product. This
                 article develops a novel approach to this codesign
                 space problem. For our specific design space, we
                 demonstrate that we can automatically predict the
                 performance that an optimizing compiler would achieve
                 without actually tuning it for any of the
                 microarchitecture configurations considered. Once
                 trained, a single run of the program compiled with the
                 standard optimization setting is enough to make a
                 prediction on the new microarchitecture with just a
                 3.2\% error rate on average. This allows the designer
                 to accurately choose an architectural configuration
                 with knowledge of how an optimizing compiler will
                 perform on it. We use this to find the best optimizing
                 compiler/architectural configuration in our codesign
                 space and demonstrate that it achieves an average 19\%
                 performance improvement and energy savings of 16\%
                 compared to the baseline, nearly doubling the
                 energy-efficiency measured as the energy-delay-squared
                 product (EDD).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Staff:2012:APA,
  author =       "{TECS Staff}",
  title =        "Abstracts of Papers to appear in {Special Supplemental
                 Issue of TECS (v11, iSupplemental1)}",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to speed up the publication process, we have
                 begun to publish supplemental online-only issues. The
                 following abstracts describe the articles in the first
                 such issue, Vol. 11S(1). These articles are available
                 in the Digital Library.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2012:PPI,
  author =       "Jongeun Lee and Aviral Shrivastava",
  title =        "{PICA}: {Processor Idle Cycle Aggregation} for
                 Energy-Efficient Embedded Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Processor Idle Cycle Aggregation (PICA) is a promising
                 approach for low-power execution of processors, in
                 which small memory stalls are aggregated to create
                 large ones, enabling profitable switch of the processor
                 into low-power mode. We extend the previous approach in
                 three dimensions. First we develop static analysis for
                 the PICA technique and present optimal parameters for
                 five common types of loops based on steady-state
                 analysis. Second, to remedy the weakness of
                 software-only control in varying environment, we
                 enhance PICA with minimal hardware extension that
                 ensures correct execution for any loops and parameters,
                 thus greatly facilitating exploration-based parameter
                 tuning. Third, we demonstrate that our PICA technique
                 can be applied to certain types of nested loops with
                 variable bounds, thus enhancing the applicability of
                 PICA. We validate our analytical model against
                 simulation-based optimization and also show, through
                 our experiments on embedded application benchmarks,
                 that our technique can be applied to a wide range of
                 loops with average 20\% energy reductions, compared to
                 executions without PICA.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{McIntire:2012:EES,
  author =       "Dustin McIntire and Thanos Stathopoulos and Sasank
                 Reddy and Thomas Schmidt and William J. Kaiser",
  title =        "Energy-Efficient Sensing with the {Low Power, Energy
                 Aware Processing} ({LEAP}) Architecture",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A broad range of embedded networked sensing (ENS)
                 applications have appeared for large-scale systems,
                 introducing new requirements leading to new embedded
                 architectures, associated algorithms, and supporting
                 software systems. These new requirements include the
                 need for diverse and complex sensor systems that
                 present demands for energy and computational resources,
                 as well as for broadband communication. To satisfy
                 application demands while maintaining critical support
                 for low-energy operation, a new multiprocessor node
                 hardware and software architecture, Low Power Energy
                 Aware Processing (LEAP), has been developed. In this
                 article, we described the LEAP design approach, in
                 which the system is able to adaptively select the most
                 energy-efficient hardware components matching an
                 application's needs. The LEAP platform supports highly
                 dynamic requirements in sensing fidelity, computational
                 load, storage media, and network bandwidth. It focuses
                 on episodic operation of each component and considers
                 the energy dissipation for each platform task by
                 integrating fine-grained energy-dissipation monitoring
                 and sophisticated power-control scheduling for all
                 subsystems, including sensors. In addition to the LEAP
                 platform's unique hardware capabilities, its software
                 architecture has been designed to provide an easy way
                 to use power management interface and a robust,
                 fault-tolerant operating environment and to enable
                 remote upgrade of all software components. LEAP
                 platform capabilities are demonstrated by example
                 implementations, such as a network protocol design and
                 a light source event detection algorithm. Through the
                 use of a distributed node testbed, we demonstrate that
                 by exploiting high energy-efficiency components and
                 enabling proper on-demand scheduling, the LEAP
                 architecture may meet both sensing performance and
                 energy dissipation objectives for a broad class of
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2012:DCR,
  author =       "Weixun Wang and Prabhat Mishra and Ann Gordon-Ross",
  title =        "Dynamic Cache Reconfiguration for Soft Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In recent years, efficient dynamic reconfiguration
                 techniques have been widely employed for system
                 optimization. Dynamic cache reconfiguration is a
                 promising approach for reducing energy consumption as
                 well as for improving overall system performance. It is
                 a major challenge to introduce cache reconfiguration
                 into real-time multitasking systems, since dynamic
                 analysis may adversely affect tasks with timing
                 constraints. This article presents a novel approach for
                 implementing cache reconfiguration in soft real-time
                 systems by efficiently leveraging static analysis
                 during runtime to minimize energy while maintaining the
                 same service level. To the best of our knowledge, this
                 is the first attempt to integrate dynamic cache
                 reconfiguration in real-time scheduling techniques. Our
                 experimental results using a wide variety of
                 applications have demonstrated that our approach can
                 significantly reduce the cache energy consumption in
                 soft real-time systems (up to 74\%).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Palermo:2012:VAR,
  author =       "Gianluca Palermo and Cristina Silvano and Vittorio
                 Zaccaria",
  title =        "A Variability-Aware Robust Design Space Exploration
                 Methodology for On-Chip Multiprocessors Subject to
                 Application-Specific Constraints",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Manufacturing process variation is dramatically
                 becoming one of the most important challenges related
                 to power and performance optimization for sub-90nm CMOS
                 technologies. Process variability impacts the
                 optimization of the target system metrics, that is,
                 performance and energy consumption by introducing
                 fluctuations and unpredictability. Besides, it impacts
                 the parametric yield of the chip with respect to
                 application level constraints by reducing the number of
                 devices working within normal operating conditions. The
                 impact of variability on systems with stringent
                 application-specific requirements (such as portable
                 multimedia and critical embedded systems) is much
                 greater than on general-purpose systems given the
                 emphasis on predictability and reduced operating
                 margins. In this market segment, failing to address
                 such a problem within the early design stages of the
                 chip may lead to missing market deadlines and suffering
                 greater economic losses. In the context of a design
                 space exploration framework for supporting the
                 platform-based design approach, we address the problem
                 of robustness with respect to manufacturing process
                 variations. First, we apply Response Surface Modeling
                 (RSM) techniques to enable an efficient evaluation of
                 the statistical measures of execution time and energy
                 consumption for each system configuration. Then, we
                 apply a robust design space exploration framework to
                 afford the problem of the impact of manufacturing
                 process variations onto the system-level metrics and
                 consequently onto the application-level constraints. We
                 finally provide a comparison of our design space
                 exploration technique with conventional approaches on
                 two different case studies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2012:UEP,
  author =       "Yoon Seok Yang and Gwan Choi",
  title =        "Unequal Error Protection Based on {DVFS} for {JSCD} in
                 Low-Power Portable Multimedia Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a low-power decoder design for
                 joint source-channel decoding (JSCD) based on a novel
                 unequal error protection (UEP) scheme over additive
                 white Gaussian noise (AWGN) channels. Conventional JSCD
                 schemes, adopting low-density parity check (LDPC) codes
                 for multimedia devices, typically operate at a
                 fixed-time decoding loop, regardless of the quality of
                 data received. We present a JSCD scheme that achieves
                 reduction in power through minimum energy decoding and
                 dynamic voltage and frequency scaling (DVFS).
                 Consequently, up to 39\% power reduction is achieved in
                 Foreman, Akiyo, and Mobile video streams without
                 performance degradation in reconstructed video
                 quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Namin:2012:EFF,
  author =       "Ashkan Hosseinzadeh Namin and Huapeng Wu and Majid
                 Ahmadi",
  title =        "An Efficient Finite Field Multiplier Using Redundant
                 Representation",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An efficient word-level finite field multiplier using
                 redundant representation is proposed. The proposed
                 multiplier has a significantly higher speed, compared
                 to previously proposed word-level architectures using
                 either redundant representation or optimal normal basis
                 type I, at the expense of moderately higher area
                 complexity. Furthermore, the new design out-performs
                 other similar proposals when considering the product of
                 area and delay as a measure of performance. ASIC
                 Realization of the proposed design using TSMC's 0.18 $
                 \mu $ m CMOS technology for the binary field size of
                 163 is also presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leyva-del-Foyo:2012:ITI,
  author =       "Luis E. Leyva-del-Foyo and Pedro Mejia-Alvarez and
                 Dionisio de Niz",
  title =        "Integrated Task and Interrupt Management for Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time scheduling algorithms like RMA or EDF and
                 their corresponding schedulability test have proven to
                 be powerful tools for developing predictable real-time
                 systems. However, the traditional interrupt management
                 model presents multiple inconsistencies that break the
                 assumptions of many of the real-time scheduling tests,
                 diminishing its utility. In this article, we analyze
                 these inconsistencies and present a model that resolves
                 them by integrating interrupts and tasks in a single
                 scheduling model. We then use the RMA theory to
                 calculate the cost of the model and analyze the
                 circumstances under which it can provide the most
                 value. This model was implemented in a kernel module.
                 The portability of the design of our module is
                 discussed in terms of its independence from both the
                 hardware and the kernel. We also discuss the
                 implementation issues of the model over conventional PC
                 hardware, along with its cost and novel optimizations
                 for reducing the overhead. Finally, we present our
                 experimental evaluation to show evidence of its
                 temporal determinism and overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Garg:2012:IMP,
  author =       "Siddharth Garg and Diana Marculescu",
  title =        "On the Impact of Manufacturing Process Variations on
                 the Lifetime of Sensor Networks",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "33:1--33:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The lifetime of individual nodes in a sensor network
                 depends strongly on the leakage power of the nodes in
                 idle state. With technology scaling, variability in
                 leakage power dissipation of sensor nodes will cause
                 increased variability in their lifetimes. In this
                 article, we analyze how the lifetime variations of
                 sensor nodes affect the performance of the sensor
                 network as a whole. We demonstrate the use of the
                 proposed framework to explore deployment cost versus
                 performance trade-offs for sensor networks. Results
                 indicate that up to 37\% improvement in the critical
                 lifetime of a sensor network can be obtained with a
                 20\% increase in deployment cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Blech:2012:GIB,
  author =       "Jan Olaf Blech and Micha{\"e}l P{\'e}rin",
  title =        "Generating Invariant-Based Certificates for Embedded
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "34:1--34:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic verification tools, such as model checkers
                 and tools based on static analysis or on abstract
                 interpretation, have become popular in software and
                 hardware development. They increase confidence and
                 potentially provide rich feedback. However, with
                 increasing complexity, verification tools themselves
                 are more likely to contain errors. In contrast to
                 automatic verification tools, higher-order theorem
                 provers use mathematically founded proof strategies
                 checked by a small proof checker to guarantee selected
                 properties. Thus, they enjoy a high level of
                 trustability. Properties of software and hardware
                 systems and their justifications can be encapsulated
                 into a certificate, thereby guaranteeing correctness of
                 the systems, with respect to the properties. These
                 results offer a much higher degree of confidence than
                 results achieved by verification tools. However,
                 higher-order theorem provers are usually slow, due to
                 their general and minimalistic nature. Even for small
                 systems, a lot of human interaction is required for
                 establishing a certificate. In this work, we combine
                 the advantages of automatic verification tools (i.e.,
                 speed and automation) with those of higher-order
                 theorem provers (i.e., high level of trustability). The
                 verification tool generates a certificate for each
                 invocation. This is checked by the higher-order theorem
                 prover, thereby guaranteeing the desired property. The
                 generation of certificates is much easier than
                 producing the analysis results of the verification tool
                 in the first place. In our work, we are able to create
                 certificates that come with an algorithmic description
                 of the proof of the desired property as justification.
                 We concentrate on verification tools that generate
                 invariants of systems and certify automatically that
                 these do indeed hold. Our approach is applied to the
                 certification of the verdicts of a deadlock-detection
                 tool for an asynchronous component-based language.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jeong:2012:PLT,
  author =       "Jaein Jeong and David Culler",
  title =        "Predicting the Long-Term Behavior of a Micro-Solar
                 Power System",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Micro-solar power system design is challenging because
                 it must address long-term system behavior under highly
                 variable solar energy conditions and consider a large
                 space of design options. Several micro-solar power
                 systems and models have been made, validating
                 particular points in the whole design space. We provide
                 a general architecture of micro-solar power
                 systems---comprising key components and
                 interconnections among the components---and formalize
                 each component in an analytical or empirical model of
                 its behavior. To model the variability of solar energy,
                 we provide three solar radiation models, depending on
                 the degree of information available: an astronomical
                 model for ideal conditions, an obstructed astronomical
                 model for estimating solar radiation under the presence
                 of shadows and obstructions, and a weather-effect model
                 for estimating solar radiation under weather variation.
                 Our solar radiation models are validated with a
                 concrete design, the HydroWatch node, thus achieving
                 small deviation from the long-term measurement. They
                 can be used in combination with other micro-solar
                 system models to improve the utility of the load and
                 estimate the behavior of micro-solar power systems more
                 accurately. Thus, our solar radiation models provide
                 more accurate estimations of solar radiation and close
                 the loop for micro-solar power system modeling.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Smith:2012:OSH,
  author =       "Melissa C. Smith and Gregory D. Peterson",
  title =        "Optimization of Shared High-Performance Reconfigurable
                 Computing Resources",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the field of high-performance computing, systems
                 harboring reconfigurable devices, such as
                 field-programmable gate arrays (FPGAs), are gaining
                 more widespread interest. Such systems range from
                 supercomputers with tightly coupled reconfigurable
                 hardware to clusters with reconfigurable devices at
                 each node. The use of these architectures for
                 scientific computing provides an alternative for
                 computationally demanding problems and has advantages
                 in metrics, such as operating cost/performance and
                 power/performance. However, performance optimization of
                 these systems can be challenging even with knowledge of
                 the system's characteristics. Our analytic performance
                 model includes parameters representing the
                 reconfigurable hardware, application load imbalance
                 across the nodes, background user load, basic
                 message-passing communication, and processor
                 heterogeneity. In this article, we provide an overview
                 of the analytical model and demonstrate its application
                 for optimization and scheduling of high-performance
                 reconfigurable computing (HPRC) resources. We examine
                 cost functions for minimum runtime and other
                 optimization problems commonly found in shared
                 computing resources. Finally, we discuss additional
                 scheduling issues and other potential applications of
                 the model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2012:EEA,
  author =       "Kyoungwoo Lee and Nikil Dutt and Nalini
                 Venkatasubramanian",
  title =        "{EAVE}: {Error-Aware Video Encoding} Supporting
                 Extended Energy\slash {QoS} Trade-offs for Mobile
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy/QoS provisioning is challenging for video
                 applications over lossy wireless network with
                 power-constrained mobile handheld devices. In this
                 work, we exploit the inherent error tolerance of video
                 data to generate a range of acceptable operating points
                 by controlling the amount of errors in the system. In
                 particular, we propose an error-aware video encoding
                 technique, EAVE, that intentionally injects errors
                 while ensuring acceptable QoS. The expanded trade-off
                 space generated by EAVE allows system designers to
                 comparatively evaluate different operating points with
                 varying QoS and energy consumption by aggressively
                 exploiting error-resilience attributes, and could
                 potentially result in significant energy savings. The
                 novelty of our approach resides in active exploitation
                 of errors to vary the operating conditions for further
                 optimization of system parameters. Moreover, we present
                 the adaptivity of our approach by incorporating the
                 feedback from the decoding side to achieve the QoS
                 requirement under the dynamic network status. Our
                 experiments show that EAVE can reduce the energy
                 consumption for an encoding device by up to 37\% for a
                 video conferencing application over a wireless network
                 without quality degradation, compared to a standard
                 video encoding technique over test video streams.
                 Further, our experimental results demonstrate that EAVE
                 can expand the design space by 14 times with respect to
                 energy consumption and by 13 times with respect to
                 video quality (compared to a traditional approach
                 without active error exploitation) on average, over
                 test video streams.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2012:ART,
  author =       "Mingsong Chen and Prabhat Mishra and Dhrubajyoti
                 Kalita",
  title =        "Automatic {RTL} Test Generation from {SystemC TLM}
                 Specifications",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2220336.2220350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "SystemC transaction-level modeling (TLM) is widely
                 used to enable early exploration for both hardware and
                 software designs. It can reduce the overall design and
                 validation effort of complex system-on-chip (SOC)
                 architectures. However, due to lack of automated
                 techniques coupled with limited reuse of validation
                 efforts between abstraction levels, SOC validation is
                 becoming a major bottleneck. This article presents a
                 novel top-down methodology for automatically generating
                 register transfer-level (RTL) tests from SystemC TLM
                 specifications. It makes two important contributions:
                 (i) it proposes a method that can automatically
                 generate TLM tests using various coverage metrics, and
                 (ii) it develops a test refinement specification for
                 automatically converting TLM tests to RTL tests in
                 order to reduce overall validation effort. We have
                 developed a tool which incorporates these activities to
                 enable automated RTL test generation from SystemC TLM
                 specifications. Case studies using a router example and
                 a 64-bit Alpha AXP pipelined processor demonstrate that
                 our approach can achieve intended functional coverage
                 of the RTL designs, as well as capture various
                 functional errors and inconsistencies between
                 specifications and implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Plaks:2012:ESS,
  author =       "Toomas P. Plaks",
  title =        "Editorial: Special Section on {CAPA'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "39:1--39:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331148",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paul:2012:PRC,
  author =       "Anand Paul and Yung-Chuan Jiang and Jhing-Fa Wang and
                 Jar-Ferr Yang",
  title =        "Parallel Reconfigurable Computing-Based Mapping
                 Algorithm for Motion Estimation in Advanced Video
                 Coding",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "40:1--40:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331149",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Computational load of motion estimation in advanced
                 video coding (AVC) standard is significantly high and
                 even worse for HDTV and super-resolution sequences. In
                 this article, a video processing algorithm is
                 dynamically mapped onto a new parallel reconfigurable
                 computing (PRC) architecture which consists of multiple
                 dynamic reconfigurable computing (DRC) units. First, we
                 construct a directed acyclic graph (DAG) to represent
                 video coding algorithms in which motion estimation is
                 the focus. A novel parallel partition approach is then
                 proposed to map motion estimation DAG onto the multiple
                 DRC units in a PRC system. This partitioning algorithm
                 is capable of design optimization of parallel
                 processing reconfigurable systems for a given number of
                 processing elements in different search ranges. This
                 speeds up the video processing with minimum
                 sacrifice.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Suris:2012:RSC,
  author =       "Jorge A. Sur{\'\i}s and Adolfo Recio and Peter
                 Athanas",
  title =        "{RapidRadio}: Signal Classification and Radio
                 Deployment Framework",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "41:1--41:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331151",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, the RapidRadio framework for signal
                 classification and receiver deployment is discussed.
                 The framework is a productivity-enhancing tool that
                 reduces the required knowledge base for implementing a
                 receiver on an FPGA-based SDR platform. The ultimate
                 objective of this framework is to identify unknown
                 signals and to build FPGA-based receivers capable of
                 receiving them. RapidRadio divides the process of radio
                 creation into two phases; the analysis phase and radio
                 synthesis phase. The analysis phase guides the user
                 through the process of classifying an unknown signal
                 and determining its modulation scheme and parameters,
                 resulting in a radio receiver model. In the second
                 phase, this model is transformed into a functional
                 receiver in an FPGA-based platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mark:2012:HBC,
  author =       "Cindy Mark and Scott Y. L. Chin and Lesley Shannon and
                 Steven J. E. Wilton",
  title =        "Hierarchical Benchmark Circuit Generation for {FPGA}
                 Architecture Evaluation",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "42:1--42:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331152",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We describe a stochastic circuit generator that can be
                 used to automatically create benchmark circuits for use
                 in FPGA architecture studies. The circuits consist of a
                 hierarchy of interconnected modules, reflecting the
                 structure of circuits designed using a system-on-chip
                 design flow. Within each level of hierarchy, modules
                 can be connected in a bus, star, or dataflow
                 configuration. Our circuit generator is calibrated
                 based on a careful study of existing system-on-chip
                 circuits. We show that our benchmark circuits lead to
                 more realistic architectural conclusions than circuits
                 generated using previous generators.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reardon:2012:REE,
  author =       "Casey Reardon and Brian Holland and Alan D. George and
                 Greg Stitt and Herman Lam",
  title =        "{RCML}: An Environment for Estimation Modeling of
                 Reconfigurable Computing Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "43:1--43:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331153",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reconfigurable computing (RC) is emerging as a
                 promising area for embedded computing, in which complex
                 systems must balance performance, flexibility, cost,
                 and power. The difficulty associated with RC
                 development suggests improved strategic planning and
                 analysis techniques can save significant development
                 time and effort. This article presents a new abstract
                 modeling language and environment, the RC Modeling
                 Language (RCML), to facilitate efficient design space
                 exploration of RC systems at the estimation modeling
                 level, that is, before building a functional
                 implementation. Two integrated analysis tools and case
                 studies, one analytical and one simulative, are
                 presented illustrating relatively accurate automated
                 analysis of systems modeled in RCML.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DiBiagio:2012:AOA,
  author =       "Andrea {Di Biagio} and Giovanni Agosta and Martino
                 Sykora and Cristina Silvano",
  title =        "Architecture Optimization of Application-Specific
                 Implicit Instructions",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "44:1--44:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331154",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic configuration of application-specific implicit
                 instructions has been proposed to better exploit the
                 available parallelism at the instruction level in
                 pipelined processors. The support of such implicit
                 instruction issue-requires the pipeline to be extended
                 with a trigger table that describes the instruction
                 implicitly issued as a response to a value written into
                 a triggering register by a triggering instruction
                 (which may be an add or sub instruction). In this
                 article, we explore the design optimization of the
                 trigger table to maximize the number of instructions
                 that can be implicitly issued while keeping the limited
                 size of the trigger table. The concept of implicitly
                 issued instruction has been formally defined by
                 considering the inter-basic block analysis of control
                 and data dependencies. A compilation tool chain has
                 been developed to automatically identify the
                 optimization opportunities, taking into account the
                 constraints imposed by control and data dependencies as
                 well as by architectural limitations. The proposed
                 solutions have been applied to the case of a baseline
                 scalar MIPS processor where, for the selected set of
                 benchmarks (DSPStone and Mibench/automotive), we
                 obtained an average speedup of 17\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Napapetian:2012:ESS,
  author =       "Ani Napapetian and William Kaiser and Majid
                 Sarrafzadeh",
  title =        "Editorial: Special Section on {WHS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "45:1--45:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331155",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guenterberg:2012:ASR,
  author =       "Eric Guenterberg and Hassan Ghasemzadeh and Roozbeh
                 Jafari",
  title =        "Automatic Segmentation and Recognition in Body Sensor
                 Networks Using a Hidden {Markov} Model",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "46:1--46:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331156",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One important application of body sensor networks is
                 action recognition. Action recognition often implicitly
                 requires partitioning sensor data into intervals, then
                 labeling the partitions according to the action that
                 each represents or as a non-action. The temporal
                 partitioning stage is called segmentation, and the
                 labeling is called classification. While many effective
                 methods exist for classification, segmentation remains
                 problematic. We present a technique inspired by
                 continuous speech recognition that combines
                 segmentation and classification using hidden Markov
                 models. This technique is distributed across several
                 sensor nodes. We show the results of this technique and
                 the bandwidth savings over full data transmission.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pradhan:2012:AVJ,
  author =       "Gaurav N. Pradhan and B. Prabhakaran",
  title =        "Analyzing and Visualizing Jump Performance Using
                 Wireless Body Sensors",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "47:1--47:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331157",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advancement in technology has led to the deployment of
                 body sensor networks (BSN) to monitor and sense human
                 activity in pervasive environments. Using multiple
                 wireless on-body systems, such as physiological data
                 monitoring and motion capture systems, body sensor
                 network data consists of heterogeneous physiologic and
                 motoric streams that form a multidimensional framework.
                 In this article, we analyze such high-dimensional body
                 sensor network data by proposing an efficient,
                 multidimensional factor analysis technique for
                 quantifying human performance and, at the same time,
                 providing visualization for performances of
                 participants in a low-dimensional space for easier
                 interpretation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Thatte:2012:KEE,
  author =       "Gautam Thatte and Ming Li and Sangwon Lee and Adar
                 Emken and Shrikanth Narayanan and Urbashi Mitra and
                 Donna Spruijt-Metz and Murali Annavaram",
  title =        "{KNOWME}: An Energy-Efficient Multimodal Body Area
                 Network for Physical Activity Monitoring",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "48:1--48:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331158",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The use of biometric sensors for monitoring an
                 individual's health and related behaviors, continuously
                 and in real time, promises to revolutionize healthcare
                 in the near future. In an effort to better understand
                 the complex interplay between one's medical condition
                 and social, environmental, and metabolic parameters,
                 this article presents the KNOWME platform, a complete,
                 end-to-end, body area sensing system that integrates
                 off-the-shelf biometric sensors with a Nokia N95 mobile
                 phone to continuously monitor the metabolic signals of
                 a subject. With a current focus on pediatric obesity,
                 KNOWME employs metabolic signals to monitor and
                 evaluate physical activity. KNOWME development and
                 in-lab deployment studies have revealed three major
                 challenges: (1) the need for robustness to highly
                 varying operating environments due to subject-induced
                 variability, such as mobility or sensor placement; (2)
                 balancing the tension between achieving high fidelity
                 data collection and minimizing network energy
                 consumption; and (3) accurate physical activity
                 detection using a modest number of sensors. The KNOWME
                 platform described herein directly addresses these
                 three challenges. Design robustness is achieved by
                 creating a three-tiered sensor data collection
                 architecture. The system architecture is designed to
                 provide robust, continuous, multichannel data
                 collection and scales without compromising normal
                 mobile device operation. Novel physical activity
                 detection methods which exploit new representations of
                 sensor signals provide accurate and efficient physical
                 activity detection. The physical activity detection
                 method employs personalized training phases and
                 accounts for intersession variability. Finally,
                 exploiting the features of the hardware implementation,
                 a low-complexity sensor sampling algorithm is
                 developed, resulting in significant energy savings
                 without loss of performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Banerjee:2012:BAT,
  author =       "Ayan Banerjee and Sailesh Kandula and Tridib Mukherjee
                 and Sandeep K. S. Gupta",
  title =        "{BAND-AiDe}: a Tool for Cyber-Physical Oriented
                 Analysis and Design of Body Area Networks and Devices",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "49:1--49:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331159",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Body area networks (BANs) are networks of medical
                 devices implanted within or worn on the human body.
                 Analysis and verification of BAN designs require (i)
                 early feedback on the BAN design and (ii)
                 high-confidence evaluation of BANs without requiring
                 any hazardous, intrusive, and costly deployment. Any
                 design of BAN further has to ensure (i) the safety of
                 the human body, that is, limiting any undesirable
                 side-effects (e.g., heat dissipation) of BAN operations
                 (involving sensing, computation, and communication
                 among the devices) on the human body, and (ii) the
                 sustainability of the BAN operations, that is, the
                 continuation of the operations under constrained
                 resources (e.g., limited battery power in the devices)
                 without requiring any redeployments. This article uses
                 the Model Based Engineering (MBE) approach to perform
                 design and analysis of BANs. In this regard, first, an
                 abstract cyber-physical model of BANs, called BAN-CPS,
                 is proposed that captures the undesirable side-effects
                 of the medical devices (cyber) on the human body
                 (physical); second, a design and analysis tool, named
                 BAND-AiDe, is developed that allows specification of
                 BAN-CPS using industry standard Abstract Architecture
                 Description Language (AADL) and enables safety and
                 sustainability analysis of BANs; and third, the
                 applicability of BAND-AiDe is shown through a case
                 study using both single and a network of medical
                 devices for health monitoring applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hanson:2012:AFE,
  author =       "Mark A. Hanson and Harry C. {Powell, Jr.} and Adam T.
                 Barth and John Lach",
  title =        "Application-Focused Energy-Fidelity Scalability for
                 Wireless Motion-Based Health Assessment",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "50:1--50:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331160",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy-fidelity trade-offs are central to the
                 performance of many technologies, but they are
                 essential in wireless body area sensor networks (BASNs)
                 due to severe energy and processing constraints and the
                 critical nature of certain healthcare applications.
                 On-node signal processing and compression techniques
                 can save energy by greatly reducing the amount of data
                 transmitted over the wireless channel, but lossy
                 techniques, capable of high compression ratios, can
                 incur a reduction in application fidelity. In order to
                 maximize system performance, these trade-offs must be
                 considered at runtime due to the dynamic nature of BASN
                 applications, including sensed data, operating
                 environments, user actuation, etc. BASNs therefore
                 require energy-fidelity scalability, so automated and
                 user-initiated trade-offs can be made dynamically. This
                 article presents a data rate scalability framework
                 within a motion-based health application context which
                 demonstrates the design of efficient and efficacious
                 wireless health systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boulis:2012:IWC,
  author =       "Athanassios Boulis and Yuriy Tselishchev and Lavy
                 Libman and David Smith and Leif Hanlen",
  title =        "Impact of Wireless Channel Temporal Variation on {MAC}
                 Design for Body Area Networks",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "51:1--51:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331161",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We investigate the impact of wireless channel temporal
                 variations on the design of medium access control (MAC)
                 protocols for body area networks (BANs). Our
                 measurements-based channel model captures large and
                 small time-scale signal correlations, giving an
                 accurate picture of the signal variation, specifically,
                 the deep fades which are the features that mostly
                 affect the behavior of the MAC. We test the effect of
                 the channel model on the performance of the 802.15.4
                 MAC both in contention access mode and TDMA access
                 mode. We show that there are considerable differences
                 in the performance of the MAC compared to simulations
                 that do not model channel temporal variation.
                 Furthermore, explaining the behavior of the MAC under a
                 temporal varying channel, we can suggest specific
                 design choices for the emerging BAN MAC standard.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fainekos:2012:ESS,
  author =       "Georgios Fainekos and Eric Goubault and Franjo
                 Ivanci{\'c} and Sriram Sankaranarayanan",
  title =        "Editorial: Special Section {VCPSS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "52:1--52:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331162",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wongpiromsarn:2012:VPC,
  author =       "Tichakorn Wongpiromsarn and Sayan Mitra and Andrew
                 Lamperski and Richard M. Murray",
  title =        "Verification of Periodically Controlled Hybrid
                 Systems: Application to an Autonomous Vehicle",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "53:1--53:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331163",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces Periodically Controlled Hybrid
                 Automata (PCHA) for modular specification of embedded
                 control systems. In a PCHA, control actions that change
                 the control input to the plant occur roughly
                 periodically, while other actions that update the state
                 of the controller may occur in the interim. Such
                 actions could model, for example, sensor updates and
                 information received from higher-level planning modules
                 that change the set point of the controller. Based on
                 periodicity and subtangential conditions, a new
                 sufficient condition for verifying invariant properties
                 of PCHAs is presented. For PCHAs with polynomial
                 continuous vector fields, it is possible to check these
                 conditions automatically using, for example, quantifier
                 elimination or sum of squares decomposition. We examine
                 the feasibility of this automatic approach on a small
                 example. The proposed technique is also used to
                 manually verify safety and progress properties of a
                 fairly complex planner-controller subsystem of an
                 autonomous ground vehicle. Geometric properties of
                 planner-generated paths are derived which guarantee
                 that such paths can be safely followed by the
                 controller.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Girard:2012:VSL,
  author =       "Antoine Girard and Gang Zheng",
  title =        "Verification of Safety and Liveness Properties of
                 Metric Transition Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "54:1--54:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331164",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider verification problems for transition
                 systems enriched with a metric structure. We believe
                 that these metric transition systems are particularly
                 suitable for the analysis of cyber-physical systems in
                 which metrics can be naturally defined on the numerical
                 variables of the embedded software and on the
                 continuous states of the physical environment. We
                 consider verification of bounded and unbounded safety
                 properties, as well as bounded liveness properties. The
                 transition systems we consider are nondeterministic,
                 finitely branching, and with a finite set of initial
                 states. Therefore, bounded safety/liveness properties
                 can always be verified by exhaustive exploration of the
                 system trajectories. However, this approach may be
                 intractable in practice, as the number of trajectories
                 usually grows exponentially with respect to the
                 considered bound. Furthermore, since the system we
                 consider can have an infinite set of states, exhaustive
                 exploration cannot be used for unbounded safety
                 verification. For bounded safety properties, we propose
                 an algorithm which combines exploration of the system
                 trajectories and state space reduction using merging
                 based on a bisimulation metric. The main novelty
                 compared to an algorithm presented recently by Lerda et
                 al. [2008] consists in introducing a tuning parameter
                 that improves the performance drastically. We also
                 establish a procedure that allows us to prove unbounded
                 safety from the result of the bounded safety algorithm
                 via a refinement step. We then adapt the algorithm to
                 handle bounded liveness verification. Finally, the
                 effectiveness of the approach is demonstrated by
                 applying it to the analysis of implementations of an
                 embedded control loop.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seshia:2012:QAS,
  author =       "Sanjit A. Seshia and Alexander Rakhlin",
  title =        "Quantitative Analysis of Systems Using Game-Theoretic
                 Learning",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "55:1--55:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331165",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The analysis of quantitative properties, such as
                 timing and power, is central to the design of reliable
                 embedded software and systems. However, the
                 verification of such properties on a program is made
                 difficult by their heavy dependence on the program's
                 environment, such as the processor it runs on. Modeling
                 the environment by hand can be tedious, error prone,
                 and time consuming. In this article, we present a new
                 game-theoretic approach to analyzing quantitative
                 properties that is based on performing systematic
                 measurements to automatically learn a model of the
                 environment. We model the problem as a game between our
                 algorithm (player) and the environment of the program
                 (adversary) in which the player seeks to accurately
                 predict the property of interest, while the adversary
                 sets environment states and parameters. To solve this
                 problem, we employ a randomized strategy that
                 repeatedly tests the program along a linear-sized set
                 of program paths called basis paths, using the
                 resulting measurements to infer a weighted-graph model
                 of the environment from which quantitative properties
                 can be predicted. Test cases are automatically
                 generated using satisfiability modulo theories (SMT)
                 solving. We prove that our algorithm can, under certain
                 assumptions and with arbitrarily high probability,
                 accurately predict properties such as worst-case
                 execution time or estimate the distribution of
                 execution times. Experimental results for execution
                 time analysis demonstrate that our approach is
                 efficient, accurate, and highly portable.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2012:MCB,
  author =       "Lan Wu and Wei Zhang",
  title =        "A Model Checking Based Approach to Bounding Worst-Case
                 Execution Time for Multicore Processors",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "56:1--56:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331166",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As multicore processors are increasingly adopted in
                 industry, it has become a great challenge to accurately
                 bound the worst-case execution time (WCET) for
                 real-time systems running on multicore chips. This is
                 particularly true because of the inter-thread
                 interferences in accessing shared resources on
                 multicores, such as shared L2 caches, which can
                 significantly affect the performance but are very
                 difficult to be estimated statically. This article
                 proposes an approach to analyzing WCET for multicore
                 processors with shared L2 instruction caches by using a
                 model checking based method. We model each concurrent
                 real-time thread, including the inter-thread cache
                 interferences with a PROMELA process, and derive the
                 WCET by using a binary search algorithm. To reduce the
                 state explosion problem, we propose several techniques
                 for reducing the memory consumption by exploiting
                 domain-specific information. Our experiments indicate
                 that compared to the static analysis technique based on
                 extended ILP (integer linear programming), our approach
                 improves the tightness of WCET estimation by more than
                 31.1\% for the benchmarks we studied. However, due to
                 the inherent complexity of multicore timing analysis
                 and the state explosion problem, the model checking
                 based approach currently can only work with small
                 real-time kernels for dual-core processors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tang:2012:UMS,
  author =       "Qinghui Tang and Sandeep K. S. Gupta and Georgios
                 Varsamopoulos",
  title =        "A Unified Methodology for Scheduling in Distributed
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "57:1--57:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331167",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A distributed cyber-physical system (DCPS) may receive
                 and induce energy-based interference to and from its
                 environment. This article presents a model and an
                 associated methodology that can be used to (i) schedule
                 tasks in DCPSs to ensure that the thermal effects of
                 the task execution are within acceptable levels, and
                 (ii) verify that a given schedule meets the
                 constraints. The model uses coarse discretization of
                 space and linearity of interference. The methodology
                 involves characterizing the interference of the task
                 execution and fitting it into the model, then using the
                 fitted model to verify a solution or explore the
                 solution space.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nghiem:2012:TTI,
  author =       "Truong Nghiem and George J. Pappas and Rajeev Alur and
                 Antoine Girard",
  title =        "Time-Triggered Implementations of Dynamic
                 Controllers",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "58:1--58:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2331147.2331168",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Bridging the gap between model-based design and
                 platform-based implementation is one of the critical
                 challenges for embedded software systems. In the
                 context of embedded control systems that interact with
                 an environment, a variety of errors due to
                 quantization, delays, and scheduling policies may
                 generate executable code that does not faithfully
                 implement the model-based design. In this article, we
                 show that the performance gap between the model-level
                 semantics of linear dynamic controllers, for example,
                 the proportional-integral-derivative (PID) controllers
                 and their implementation-level semantics, can be
                 rigorously quantified if the controller implementation
                 is executed on a predictable time-triggered
                 architecture. Our technical approach uses lifting
                 techniques for periodic time-varying linear systems in
                 order to compute the exact error between the model
                 semantics and the execution semantics. Explicitly
                 computing the impact of the implementation on overall
                 system performance allows us to compare and partially
                 order different implementations with various scheduling
                 or timing characteristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2012:UAS,
  author =       "Qi Dong and Donggang Liu",
  title =        "Using Auxiliary Sensors for Pairwise Key Establishment
                 in {WSN}",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345771",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many techniques have been developed recently for
                 establishing pairwise keys in sensor networks. However,
                 some of them are vulnerable to a few compromised sensor
                 nodes, while others could involve expensive protocols
                 for establishing keys. This article introduces a much
                 better alternative that can achieve both high
                 resilience to node compromises and high efficiency in
                 key establishment. The main idea is to deploy a small
                 number of additional sensor nodes, called assisting
                 nodes, to help key establishment between sensor nodes.
                 The proposed approach has many advantages over existing
                 approaches. In particular, a sensor node only needs to
                 make a few local communications and perform a few
                 efficient hash operations to setup a key with any other
                 sensor node in the network at a very high probability.
                 The majority of sensor nodes only need to store a
                 single key. Besides, it also provides high resilience
                 to node compromises. The theoretical analysis,
                 simulation studies, and experiments on TelosB sensor
                 motes also demonstrate the advantages of this key
                 establishment protocol in sensor networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Arora:2012:ILM,
  author =       "Divya Arora and Najwa Aaraj and Anand Raghunathan and
                 Niraj K. Jha",
  title =        "{INVISIOS}: a Lightweight, Minimally Intrusive Secure
                 Execution Environment",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345772",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many information security attacks exploit
                 vulnerabilities in ``trusted'' and privileged software
                 executing on the system, such as the operating system
                 (OS). On the other hand, most security mechanisms
                 provide no immunity to security-critical user
                 applications if vulnerabilities are present in the
                 underlying OS. While technologies have been proposed
                 that facilitate isolation of security-critical
                 software, they require either significant computational
                 resources and are hence not applicable to many
                 resource-constrained embedded systems, or necessitate
                 extensive redesign of the underlying processors and
                 hardware. In this work, we propose INVISIOS: a
                 lightweight, minimally intrusive hardware-software
                 architecture to make the execution of security-critical
                 software invisible to the OS, and hence protected from
                 its vulnerabilities. The INVISIOS software architecture
                 encapsulates the security-critical software into a
                 self-contained software module. While this module is
                 part of the kernel and is run with kernel-level
                 privileges, its code, data, and execution are
                 transparent to and protected from the rest of the
                 kernel. The INVISIOS hardware architecture consists of
                 simple add-on hardware components that are responsible
                 for bootstrapping the secure core, ensuring that it is
                 exercised by applications in only permitted ways, and
                 enforcing the isolation of its code and data. We
                 implemented INVISIOS by enhancing a full-system
                 emulator and Linux to model the proposed software and
                 hardware enhancements, and applied it to protect a
                 commercial cryptographic library. Our experiments
                 demonstrate that INVISIOS is capable of facilitating
                 secure execution at very small overheads, making it
                 suitable for resource-constrained embedded systems and
                 systems-on-chip.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Izosimov:2012:SOF,
  author =       "Viacheslav Izosimov and Paul Pop and Petru Eles and
                 Zebo Peng",
  title =        "Scheduling and Optimization of Fault-Tolerant Embedded
                 Systems with Transparency\slash Performance
                 Trade-Offs",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345773",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a strategy for the
                 synthesis of fault-tolerant schedules and for the
                 mapping of fault-tolerant applications. Our techniques
                 handle transparency/performance trade-offs and use the
                 fault-occurrence information to reduce the overhead due
                 to fault tolerance. Processes and messages are
                 statically scheduled, and we use process reexecution
                 for recovering from multiple transient faults. We
                 propose a fine-grained transparent recovery, where the
                 property of transparency can be selectively applied to
                 processes and messages. Transparency hides the recovery
                 actions in a selected part of the application so that
                 they do not affect the schedule of other processes and
                 messages. While leading to longer schedules,
                 transparent recovery has the advantage of both improved
                 debuggability and less memory needed to store the
                 fault-tolerant schedules.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2012:PAA,
  author =       "Shengqi Yang and Pallav Gupta and Marilyn Wolf and
                 Dimitrios Serpanos and Vijaykrishnan Narayanan and Yuan
                 Xie",
  title =        "Power Analysis Attack Resistance Engineering by
                 Dynamic Voltage and Frequency Scaling",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345774",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a novel approach to cryptosystem
                 design to prevent power analysis attacks. Such attacks
                 infer program behavior by continuously monitoring the
                 power supply current going into the processor core.
                 They form an important class of security attacks. Our
                 approach is based on dynamic voltage and frequency
                 scaling (DVFS), which hides processor state to make it
                 harder for an attacker to gain access to a secure
                 system. Three designs are studied to test the efficacy
                 of the DVFS method against power analysis attacks. The
                 advanced realization of our cryptosystem is presented
                 which achieves enough high power and time trace
                 entropies to block various kinds of power analysis
                 attacks in the DES algorithm. We observed 27\% energy
                 reduction and 16\% time overhead in these algorithms.
                 Finally, DVFS hardness analysis is presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shokry:2012:HSS,
  author =       "Hesham Shokry and Hatem M. El-Boghdadi",
  title =        "On Heuristic Solutions to the Simple Offset Assignment
                 Problem in Address-Code Optimization",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345775",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing demand for more functionality in
                 embedded systems applications nowadays requires
                 efficient generation of compact code for embedded DSP
                 processors. Because such processors have highly
                 irregular data-paths, compilers targeting those
                 processors are challenged with the automatic generation
                 of optimized code with competent quality comparable to
                 hand-crafted code. A major issue in code-generation is
                 to optimize the placement of program variables in ROM
                 relative to each other so as to reduce the overhead
                 instructions dedicated for address computations. Modern
                 DSP processors are typically shipped with a feature
                 called Address Generation Unit (AGU) that provides
                 efficient address-generation instructions for accessing
                 program variables. Compilers targeting those processors
                 are expected to exploit the AGU to optimize variables
                 assignment. This article focuses on one of the basic
                 offset-assignment problems; the Simple Offset
                 Assignment (SOA) problem, where the AGU has only one
                 Address Register and no Modify Registers. The notion of
                 Tie-Break Function, TBF, introduced by Leupers and
                 Marwedel [1996], has been used to guide the placement
                 of variables in memory. In this article, we introduce a
                 more effective form of the TBF; the Effective
                 Tie-Breaking Function, ETBF, and show that the ETBF is
                 better at guiding the variables placement process.
                 Underpinning ETBF is the fact that program variables
                 are placed in memory in sequence, with each variable
                 having only two neighbors. We applied our technique to
                 randomly generated graphs as well as to real-world code
                 from the OffsetStone testbench [2010]. In previous work
                 [Ali et al. 2008], our technique showed up to 7\%
                 reduction in overhead when applied to
                 randomly-generated problem instances. We report in this
                 article on a further experiment of our technique on
                 real-code from the OffsetStone testbench. Despite the
                 substantial improvement our technique has achieved when
                 applied to random problem instances, we found that it
                 shows slight overhead reduction when applied to
                 real-world instances in OffsetStone, which agrees with
                 similar existing experiments. We analyze these results
                 and show that the ETBF defaults to TBF.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Girodias:2012:IMO,
  author =       "Bruno Girodias and Luiza Gheorghe Iugan and Youcef
                 Bouchebaba and Gabriela Nicolescu and El Mostapha
                 Abouhamid and Michel Langevin and Pierre Paulin",
  title =        "Integrating Memory Optimization with Mapping
                 Algorithms for Multi-Processors System-on-Chip",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345776",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Due to their great ability to parallelize at a very
                 high integration level, Multi-Processors
                 Systems-on-Chip (MPSoCs) are good candidates for
                 systems and applications such as multimedia. Memory is
                 becoming a key player for significant improvements in
                 these applications (power, performance and area). The
                 large amount of data manipulated by these applications
                 requires high-capacity computing and memory. Lately,
                 new programming models have been introduced. This leads
                 to the need of new optimization and mapping techniques
                 suitable for embedded systems and their programming
                 models. This article presents novel approaches for
                 combining memory optimization with mapping of
                 data-driven applications while considering
                 anti-dependence conflicts. Two different approaches are
                 studied and integrated with existing mapping
                 algorithms. The first approach (based on heuristic
                 algorithms) keeps the graph transformation for memory
                 optimization stage from the mapping stage and enables
                 their combination in a design flow. The second approach
                 (based on evolutionary algorithms) combines these two
                 stages and integrates them in a unique stage. Some
                 significant improvements are obtained for memory gain,
                 communication load and physical links.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhong:2012:SNL,
  author =       "Ziguo Zhong and Tian He",
  title =        "Sensor Node Localization with Uncontrolled Events",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345777",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Event-driven localization has been proposed as a
                 low-cost solution for node positioning in wireless
                 sensor networks. In order to eliminate the costly
                 requirement for accurate event control in existing
                 methods, we present a practical design using
                 uncontrolled events. The main idea is to estimate both
                 event generation parameters and the location of sensor
                 nodes simultaneously, by processing node sequences that
                 can be easily obtained from event detections. Besides
                 the basic design, we proposed two enhancements to
                 further extract information embedded in node orderings
                 for two scenarios: (i) node density is high; and (ii)
                 abundant events are available. To demonstrate the
                 generality of our design, both straight-line scan and
                 circular wave propagation events are addressed in the
                 article, and we evaluated the design with extensive
                 simulation as well as a testbed implementation with 41
                 MICAz motes. Results show that with only randomly
                 generated events, our design can effectively localize
                 nodes with great flexibility while adding little extra
                 cost at the resource constrained sensor node side. In
                 addition, localization via uncontrolled events provides
                 a potential option of achieving node positioning
                 through long-term ambient events.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kumar:2012:ECI,
  author =       "Karthik Kumar and Yamini Nimmagadda and Yung-Hsiang
                 Lu",
  title =        "Energy Conservation for Image Retrieval on Mobile
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345779",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Mobile systems such as PDAs and cell phones play an
                 increasing role in handling visual contents such as
                 images. Thousands of images can be stored in a mobile
                 system with the advances in storage technology: this
                 creates the need for better organization and retrieval
                 of these images. Content Based Image Retrieval (CBIR)
                 is a method to retrieve images based on their visual
                 contents. In CBIR, images are compared by matching
                 their numerical representations called features; CBIR
                 is computation and memory intensive and consumes
                 significant amounts of energy. This article examines
                 energy conservation for CBIR on mobile systems. We
                 present three improvements to save energy while
                 performing the computation on the mobile system:
                 selective loading, adaptive loading, and caching
                 features in memory. Using these improvements adaptively
                 reduces the features to be loaded into memory for each
                 search. The reduction is achieved by estimating the
                 difficulty of the search. If the images in the
                 collection are dissimilar, fewer features are
                 sufficient; less computation is performed and energy
                 can be saved. We also consider the effect of
                 consecutive user queries and show how features can be
                 cached in memory to save energy. We implement a CBIR
                 algorithm on an HP iPAQ hw6945 and show that these
                 improvements can save energy and allow CBIR to scale up
                 to 50,000 images on a mobile system. We further
                 investigate if energy can be saved by migrating parts
                 of the computation to a server, called computation
                 offloading. We analyze the impact of the wireless
                 bandwidth, server speed, number of indexed images, and
                 the number of image queries on the energy consumption.
                 Using our scheme, CBIR can be made energy efficient
                 under all conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2012:IMR,
  author =       "Jaehwan John Lee and Xiang Xiao",
  title =        "Instant Multiunit Resource Hardware Deadlock Detection
                 Scheme for System-on-Chips",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "67:1--67:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345780",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a brand new method of determining
                 deadlock is presented. Most previous deadlock detection
                 methods are algorithmic in the sense that they usually
                 leverage some forms of Resource Allocation Graph (RAG)
                 representations and then algorithms are devised to
                 manipulate such representations in order to detect
                 deadlock using information contained in the graph.
                 Different from all previous methods, the proposed
                 method actualizes the RAG with a digital circuit and
                 uses it as a token-transmitting network. By supplying
                 special input signals (tokens) to the network and
                 observing the output tokens from the network, it is
                 easier to identify which process nodes are reachable
                 from each resource node in the graph. Using the
                 reachability information, deadlock can be detected
                 immediately. The time required to obtain the
                 reachability information is determined by how fast the
                 combinational circuit operates. Compared with previous
                 algorithmic methods, the proposed deadlock detection
                 can be deemed instant. We show that the proposed method
                 is an order of magnitude faster than the previous
                 fastest hardware mechanism and several orders of
                 magnitude faster than traditional software-based
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zappi:2012:NLP,
  author =       "Piero Zappi and Daniel Roggen and Elisabetta Farella
                 and Gerhard Tr{\"o}ster and Luca Benini",
  title =        "Network-Level Power-Performance Trade-Off in Wearable
                 Activity Recognition: a Dynamic Sensor Selection
                 Approach",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345781",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wearable gesture recognition enables context aware
                 applications and unobtrusive HCI. It is realized by
                 applying machine learning techniques to data from
                 on-body sensor nodes. We present an gesture recognition
                 system minimizing power while maintaining a run-time
                 application defined performance target through dynamic
                 sensor selection. Compared to the non managed approach
                 optimized for recognition accuracy (95\% accuracy), our
                 technique can extend network lifetime by 4 times with
                 accuracy {$>$90}\% and by 9 times with accuracy
                 {$>$70}\%. We characterize the approach and outline its
                 applicability to other scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ambrose:2012:RII,
  author =       "Jude A. Ambrose and Roshan G. Ragel and Sri
                 Parameswaran",
  title =        "Randomized Instruction Injection to Counter Power
                 Analysis Attacks",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "69:1--69:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2345770.2345782",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Side-channel attacks in general and power analysis
                 attacks in particular are becoming a major security
                 concern in embedded systems. Countermeasures proposed
                 against power analysis attacks are data and table
                 masking, current flattening, dummy instruction
                 insertion and bit-flips balancing. All these techniques
                 are either susceptible to multi-order power analysis
                 attack, not sufficiently generic to cover all
                 encryption algorithms, or burden the system with high
                 area, run-time or energy cost. In this article, we
                 propose a randomized instruction injection technique
                 (RIJID) that overcomes the pitfalls of previous
                 countermeasures. RIJID scrambles the power profile of a
                 cryptographic application by injecting random
                 instructions at random points of execution and
                 therefore protects the system against power analysis
                 attacks. Two different ways of triggering the
                 instruction injection are also presented: (1)
                 softRIJID, a hardware/software approach, where special
                 instructions are used in the code for triggering the
                 injection at runtime; and (2) autoRIJID, a hardware
                 approach, where the code injection is triggered by the
                 processor itself via detecting signatures of encryption
                 routines at runtime. A novel signature detection
                 technique is also introduced for identifying encryption
                 routines within application programs at runtime.
                 Further, a simple obfuscation metric (RIJIDindex) based
                 on cross-correlation that measures the scrambling
                 provided by any code injection technique is introduced,
                 which coarsely indicates the level of scrambling
                 achieved. Our processor models cost 1.9\% additional
                 area in the hardware/software approach and 1.2\% in the
                 hardware approach for a RISC based processor, and costs
                 on average 29.8\% in runtime and 27.1\% in energy for
                 the former and 25.0\% in runtime and 28.5\% in energy
                 for the later, for industry standard cryptographic
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pimentel:2012:ISS,
  author =       "Andy D. Pimentel and Naehyuck Chang and Mladen
                 Berekovic",
  title =        "Introduction to special section {ESTIMedia'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "70:1--70:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paterna:2012:VTW,
  author =       "Francesco Paterna and Andrea Acquaviva and Francesco
                 Papariello and Giuseppe Desoli and Luca Benini",
  title =        "Variability-tolerant workload allocation for {MPSoC}
                 energy minimization under real-time constraints",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "71:1--71:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sub-50nm CMOS technologies are affected by significant
                 variability, which causes power and performance
                 variations among nominally similar cores in MPSoC
                 platforms. This undesired heterogeneity threatens
                 execution predictability and energy efficiency. We
                 propose two techniques to allocate sets of
                 barrier-synchronized tasks. The first technique models
                 allocation as an ILP and achieves optimal results, but
                 requires an offline solver. The second technique adopts
                 a two-stage heuristic approach, and it can be adapted
                 to work online. We tested our approach on the virtual
                 prototype of a next-generation industrial multicore
                 platform. Experimental results demonstrate that our
                 approach minimizes deadline violations while increasing
                 energy efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsutsui:2012:HTP,
  author =       "Hiroshi Tsutsui and Koichi Hattori and Hiroyuki Ochi
                 and Yukihiro Nakamura",
  title =        "A high-throughput pipelined parallel architecture for
                 {JPEG XR} encoding",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "72:1--72:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "JPEG XR is an emerging image coding standard, based on
                 HD Photo developed by Microsoft Corporation. It
                 supports high compression performance twice as high as
                 the de facto image coding system, namely, JPEG, and
                 also has an advantage over JPEG 2000 in terms of
                 computational cost. JPEG XR is expected to be
                 widespread for many devices including embedded systems
                 in the near future. In this article, we propose a novel
                 architecture for JPEG XR encoding. In previous
                 architectures, entropy coding was the throughput
                 bottleneck because it was implemented as a sequential
                 algorithm to handle data with dependency. We found that
                 there is no dependency in intra-macroblock data, and we
                 could safely pipeline all the encoding processes
                 including the entropy coding. In addition, each module
                 of our architecture, which can be regarded as a
                 pipeline stage, can be parallelized. As a result, our
                 architecture can achieve 12.8 pixel/cycle at its
                 maximum. To demonstrate our architecture, we designed
                 three versions of our architecture with different
                 degrees of parallelism of one, two, and four. Our
                 four-way parallel architecture achieves 579 Mpixel/sec
                 at 181MHz clock frequency for grayscale images.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2012:XFM,
  author =       "Minyoung Kim and Mark-Oliver Stehr and Carolyn Talcott
                 and Nikil Dutt and Nalini Venkatasubramanian",
  title =        "{xTune}: a formal methodology for cross-layer tuning
                 of mobile embedded systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "73:1--73:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Resource-limited mobile embedded systems can benefit
                 greatly from dynamic adaptation of system parameters.
                 We propose a novel approach that employs iterative
                 tuning using lightweight formal verification at runtime
                 with feedback for dynamic adaptation. One objective of
                 this approach is to enable trade-off analysis across
                 multiple layers (e.g., application, middleware, OS) and
                 predict the possible property violations as the system
                 evolves dynamically over time. Specifically, an
                 executable formal specification is developed for each
                 layer of the mobile system under consideration. The
                 formal specification is then analyzed using statistical
                 property checking and statistical quantitative
                 analysis, to determine the impact of various resource
                 management policies for achieving desired timing/QoS
                 properties. Integration of formal analysis with dynamic
                 behavior from system execution results in a feedback
                 loop that enables model refinement and further
                 optimization of policies and parameters. We demonstrate
                 the applicability of this approach to the adaptive
                 provisioning of resource-limited distributed real-time
                 systems using a mobile multimedia case study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dick:2012:ISS,
  author =       "Robert Dick and Li Shang and Nikil Dutt",
  title =        "Introduction to special section {SCPS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "74:1--74:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Koutsoukos:2012:PAM,
  author =       "Xenofon Koutsoukos and Nicholas Kottenstette and
                 Joseph Hall and Emeka Eyisi and Heath Leblanc and
                 Joseph Porter and Janos Sztipanovits",
  title =        "A passivity approach for model-based compositional
                 design of networked control systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "75:1--75:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The integration of physical systems through computing
                 and networking has become pervasive, a trend now known
                 as cyber-physical systems (CPS). Functionality in CPS
                 emerges from the interaction of networked computational
                 and physical objects. System design and integration are
                 particularly challenging because fundamentally
                 different physical and computational design concerns
                 intersect. The impact of these interactions is the loss
                 of compositionality which creates tremendous
                 challenges. The key idea in this article is to use
                 passivity for decoupling the control design of
                 networked systems from uncertainties such as time
                 delays and packet loss, thus providing a fundamental
                 simplification strategy that limits the complexity of
                 interactions. The main contribution is the application
                 of the approach to an experimental case study of a
                 networked multi-robot system. We present a networked
                 control architecture that ensures the overall system
                 remains stable in spite of implementation uncertainties
                 such as network delays and data dropouts, focusing on
                 the technical details required for the implementation.
                 We describe a prototype domain-specific modeling
                 language and automated code generation tools for the
                 design of networked control systems on top of passivity
                 that facilitate effective system configuration,
                 deployment, and testing. Finally, we present
                 experimental evaluation results that show decoupling of
                 interlayer interactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shin:2012:CTC,
  author =       "Donghwa Shin and Jaehyun Park and Younghyun Kim and
                 Jaeam Seo and Naehyuck Chang",
  title =        "Control-theoretic cyber-physical system modeling and
                 synthesis: a case study of an active direct methanol
                 fuel cell",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "76:1--76:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A joint optimization of the physical system and the
                 cyber world is one of the key problems in the design of
                 a cyber-physical system (CPS). The major mechanical
                 forces and/or chemical reactions in a plant are
                 commonly modified by actuators in the balance-of-plant
                 (BOP) system. More powerful actuators requires more
                 power, but generally increase the response of the
                 physical system powered by the electrical energy
                 generated by the physical system. To maximize the
                 overall output of a power generating plant therefore
                 requires joint optimization of the physical system and
                 the cyber world, and this is a key factor in the design
                 of a CPS. We introduce a systematic approach to the
                 modeling and synthesis of a CPS that emphasize joint
                 power optimization, using an active direct methanol
                 fuel cell (DMFC) as a case study. Active DMFC systems
                 are superior to passive DMFCs in terms of fuel
                 efficiency thanks to their BOP system, which includes
                 pumps, air blowers, and fans. However, designing a
                 small-scale active DMFC with the best overall system
                 efficiency requires the BOP system to be jointly
                 optimized with the DMFC stack operation, because the
                 BOP components are powered by the stack. Our approach
                 to this synthesis problem involves (i) BOP system
                 characterization, (ii) integrated DMFC system modeling,
                 (iii) configuring a system for the maximum net power
                 output through design space exploration, (iv) synthesis
                 of feedback control tasks, and (v) implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Malik:2012:SLA,
  author =       "Avinash Malik and Zoran Salcic and Christopher Chong
                 and Salman Javed",
  title =        "System-level approach to the design of a smart
                 distributed surveillance system using {SystemJ}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "77:1--77:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed surveillance systems represent a class of
                 sensor networks used for object location and tracking,
                 road traffic monitoring, security, and other purposes.
                 They are very complex to describe, design, and run.
                 Because of their sensitivity, they need to be carefully
                 designed and validated. We present a system-level
                 approach to modeling and designing such systems using a
                 new system-level programming language, SystemJ, which
                 enables designers to describe computational and
                 communication parts of such applications in a highly
                 abstract manner. The designed system can be modeled and
                 validated even before deployment and in that way
                 contribute to the overall reliability and
                 trustworthiness of such systems. As an additional tool,
                 the design environment for specification of the
                 surveillance system topology, physical and
                 communication properties, selected sensors and their
                 interconnectivity with the computing resources was
                 developed. This tool enables easy composition of
                 multiple sensors and their respective controllers,
                 capturing changes of configuration of the system and
                 underlying communication, and automatic generation of
                 the formal description of the surveillance system. This
                 description is then used for the generation of
                 executable code and/or the templates for detailed
                 SystemJ application-specific code, as well as for
                 generation of the operator GUI in a surveillance
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yoong:2012:ICC,
  author =       "Li Hsien Yoong and Partha S. Roop and Zoran Salcic",
  title =        "Implementing constrained cyber-physical systems with
                 {IEC 61499}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "78:1--78:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPS) are integrations of
                 computation and control with sensing and actuation of
                 the physical environment. Typically, such systems
                 consist of embedded computers that monitor and control
                 physical processes in a feedback loop. While modern
                 electronic systems are increasingly characterized as
                 CPS, their design and synthesis still rely on
                 traditional methods, which lack systematic and
                 automated techniques for accomplishment. Recently, IEC
                 61499 has been proposed as a standard for designing
                 industrial process-control and measurement systems. It
                 prescribes a component-based approach for developing
                 industrial automation software using function blocks.
                 Executable code can then be automatically generated and
                 simulated from these function blocks. This bodes well
                 for designers of CPS, who are more likely to be experts
                 in specific industrial domains, rather than in computer
                 science. The intuitive graphical nature and automatic
                 code synthesis of IEC 61499 programs will alleviate the
                 programming burden of industrial engineers, while
                 ensuring more reliable software. While software
                 synthesis from IEC 61499 programs is not new, the
                 generation of efficient code from them has been
                 wanting. This has made it difficult for function blocks
                 to be used in software development for
                 resource-constrained embedded controllers commonly
                 employed in CPS. To address this, we present an
                 approach that can generate very efficient code from
                 function block descriptions. Experimental results from
                 a benchmark suite shows that our approach produces
                 substantially faster and smaller code compared to
                 existing techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Subramanian:2012:GOP,
  author =       "Varun Subramanian and Michael Gilberti and Alex Doboli
                 and Dan Pescaru",
  title =        "A goal-oriented programming framework for grid sensor
                 networks with reconfigurable embedded nodes",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "79:1--79:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPS) are large, distributed
                 embedded systems integrated with various sensors and
                 actuators. CPS are rapidly emerging as an important
                 computing paradigm in many modern applications.
                 Developing CPS applications is currently challenging
                 due to the sheer complexity of the related
                 functionality as well as the broad set of constraints
                 and unknowns that must be tackled during operation.
                 This article presents a novel high-level programming
                 model and the supporting optimization and middleware
                 routines for executing applications on
                 physically-distributed networks of reconfigurable
                 embedded systems. The proposed model describes the
                 optimization goals, sensing inputs, actuation outputs,
                 events, and constraints of an application, while
                 leaving to the compiler and execution environment the
                 task of optimally implementing the derived
                 functionality. Experimental results discuss the
                 additional performance optimizations enabled by the
                 proposed model, and the timing and power consumption of
                 the middleware routines, and present a temperature
                 monitoring application implemented on a network of
                 reconfigurable, embedded processors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tan:2012:ACF,
  author =       "Rui Tan and Guoliang Xing and Xue Liu and Jianguo Yao
                 and Zhaohui Yuan",
  title =        "Adaptive calibration for fusion-based cyber-physical
                 systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "80:1--80:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many Cyber-Physical Systems (CPS) are composed of
                 low-cost devices that are deeply integrated with
                 physical environments. As a result, the performance of
                 a CPS system is inevitably undermined by various
                 physical uncertainties, which include stochastic
                 noises, hardware biases, unpredictable environment
                 changes, and dynamics of the physical process of
                 interest. Traditional solutions to these issues (e.g.,
                 device calibration and collaborative signal processing)
                 work in an open-loop fashion and hence often fail to
                 adapt to the uncertainties after system deployment. In
                 this article, we propose an adaptive system-level
                 calibration approach for a class of CPS systems whose
                 primary objective is to detect events or targets of
                 interest. Through collaborative data fusion, our
                 calibration approach features a feedback control loop
                 that exploits system heterogeneity to mitigate the
                 impact of aforementioned uncertainties on the system
                 performance. In contrast to existing heuristic-based
                 solutions, our control-theoretical calibration
                 algorithm can ensure provable system stability and
                 convergence. We also develop a routing algorithm for
                 fusion-based multihop CPS systems that is robust to
                 communication unreliability and delay. Our approach is
                 evaluated by both experiments on a testbed of Tmotes as
                 well as extensive simulations based on data traces
                 gathered from a real vehicle detection experiment. The
                 results demonstrate that our calibration algorithm
                 enables a CPS system to maintain the optimal sensing
                 performance in the presence of various system and
                 environmental dynamics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nam:2012:MTI,
  author =       "Min-Young Nam and Kyungtae Kang and Rodolfo Pellizzoni
                 and Kyung-Joon Park and Jung-Eun Kim and Lui Sha",
  title =        "Modeling towards incremental early analyzability of
                 networked avionics systems using virtual integration",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "81:1--81:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the advance of hardware technology, more features
                 are incrementally added to already existing networked
                 systems. Avionics has a stronger tendency to use
                 preexisting applications due to its complexity and
                 scale. As resource sharing becomes intense among the
                 network and the computing modules, it has become a
                 difficult task for the system designer to make
                 confident architectural decisions even for incremental
                 changes. Providing a tailored environment to model and
                 analyze incremental changes requires a combination of
                 software tools and hardware support. We have built a
                 virtual integration tool called ASIIST which can
                 provide a worst-case end-to-end latency of data that is
                 sent through a network and the internal bus
                 architecture of the end-systems. Also, we have devised
                 a new real-time switching algorithm which guarantees
                 the worst-case network delay of preexisting network
                 traffic under feasible conditions. With the real-time
                 switch support, ASIIST can provide an early modularized
                 analysis of the end-to-end latency to make
                 architectural design choices and incremental changes
                 easier for the user.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pajic:2012:RAE,
  author =       "Miroslav Pajic and Alexander Chernoguzov and Rahul
                 Mangharam",
  title =        "Robust architectures for embedded wireless network
                 control and actuation",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "82:1--82:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Networked cyber-physical systems are fundamentally
                 constrained by the tight coupling and closed-loop
                 control of physical processes. To address actuation in
                 such closed-loop wireless control systems there is a
                 strong need to rethink the communication architectures
                 and protocols for reliability, coordination, and
                 control. We introduce the Embedded Virtual Machine
                 (EVM), a programming abstraction where controller tasks
                 with their control and timing properties are maintained
                 across physical node boundaries and functionality is
                 capable of migrating to the most competent set of
                 physical controllers. In the context of process and
                 discrete control, an EVM is the distributed runtime
                 system that dynamically selects primary-backup sets of
                 controllers given spatial and temporal constraints of
                 the underlying wireless network. EVM-based algorithms
                 allow network control algorithms to operate seamlessly
                 over less reliable wireless networks with topological
                 changes. They introduce new capabilities such as
                 predictable outcomes during sensor/actuator failure,
                 adaptation to mode changes, and runtime optimization of
                 resource consumption. An automated design flow from
                 Simulink to platform-independent domain-specific
                 languages, and subsequently, to platform-dependent code
                 generation is presented. Through case studies in
                 discrete and process control we demonstrate the
                 capabilities of EVM-based wireless network control
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lakshmanan:2012:OPM,
  author =       "Karthik Lakshmanan and Dionisio {De Niz} and
                 Ragunathan (Raj) Rajkumar and Gabriel Moreno",
  title =        "Overload provisioning in mixed-criticality
                 cyber-physical systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "83:1--83:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems are an emerging class of
                 applications that require tightly coupled interaction
                 between the computational and physical worlds. These
                 systems are typically realized using sensor/actuator
                 interfaces connected with processing backbones. Safety
                 is a primary concern in cyber-physical systems since
                 the actuators directly influence the physical world.
                 However, unexpected or unusual conditions in the
                 physical world can manifest themselves as increased
                 workload demands being offered to the computational
                 infrastructure of a cyber-physical system. Guaranteeing
                 system safety under overload conditions is therefore a
                 prime concern in developing and deploying
                 cyber-physical systems. In this work, we study this
                 problem in the context of a radar surveillance system,
                 where tasks have different levels of criticality or
                 influence on system safety. In the face of overloads,
                 we observe that the desirable property in such systems
                 is that the more critical tasks continue to meet their
                 timing requirements. We capture this mixed-criticality
                 overload requirement using a formal overload-tolerance
                 metric called ductility. Using this overload-tolerance
                 metric, we first develop our solution in the context of
                 uniprocessor systems, where we show that Zero-Slack
                 scheduling (ZS) algorithms can be used to improve the
                 overload behavior in mixed-criticality cyber-physical
                 systems compared to existing fixed-priority scheduling
                 algorithms like Rate-Monotonic Scheduling (RMS) and
                 Criticality-As-Priority-Assignment (CAPA). Leveraging
                 these results, we then develop a criticality-aware task
                 allocation algorithm called Compress-on-Overload
                 Packing (COP) for dealing with multiprocessor
                 cyber-physical systems. Evaluation results show that
                 COP achieves up to five times better ductility than
                 traditional load balancing bin-packing algorithms like
                 Worst-Fit Decreasing (WFD). Finally, we apply ZS and
                 COP to the radar surveillance system to demonstrate the
                 resulting improvement in system overload behavior. Our
                 implementation of the Zero-Slack scheduler is available
                 as a part of the Linux/RK project, which provides
                 resource kernel extensions for Linux.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Woehrle:2012:CTC,
  author =       "Matthias Woehrle and Kai Lampka and Lothar Thiele",
  title =        "Conformance testing for cyber-physical systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "84:1--84:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362351",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-Physical Systems (CPS) require a high degree of
                 reliability and robustness. Hence it is important to
                 assert their correctness with respect to
                 extra-functional properties, like power consumption,
                 temperature, etc. In turn the physical quantities may
                 be exploited for assessing system implementations. This
                 article develops a methodology for utilizing
                 measurements of physical quantities for testing the
                 conformance of a running CPS with respect to a formal
                 description of its required behavior allowing to
                 uncover defects. We present foundations and
                 implementations of this approach and demonstrate its
                 usefulness by conformance testing power measurements of
                 a wireless sensor node with a formal model of its power
                 consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2012:OTA,
  author =       "Qi Zhu and Haibo Zeng and Wei Zheng and Marco {Di
                 Natale} and Alberto Sangiovanni-Vincentelli",
  title =        "Optimization of task allocation and priority
                 assignment in hard real-time distributed systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "85:1--85:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362352",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The complexity and physical distribution of modern
                 active safety, chassis, and powertrain automotive
                 applications requires the use of distributed
                 architectures. Complex functions designed as networks
                 of function blocks exchanging signal information are
                 deployed onto the physical HW and implemented in a SW
                 architecture consisting of a set of tasks and messages.
                 The typical configuration features priority-based
                 scheduling of tasks and messages and imposes end-to-end
                 deadlines. In this work, we present and compare
                 formulations and procedures for the optimization of the
                 task allocation, the signal to message mapping, and the
                 assignment of priorities to tasks and messages in order
                 to meet end-to-end deadline constraints and minimize
                 latencies. Our formulations leverage worst-case
                 response time analysis within a mixed integer linear
                 optimization framework and are compared for performance
                 against a simulated annealing implementation. The
                 methods are applied for evaluation to an automotive
                 case study of complexity comparable to industrial
                 design problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cucinotta:2012:ART,
  author =       "Tommaso Cucinotta and Fabio Checconi and Luca Abeni
                 and Luigi Palopoli",
  title =        "Adaptive real-time scheduling for legacy multimedia
                 applications",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "86:1--86:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362353",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multimedia applications are often executed on standard
                 personal computers. The absence of established
                 standards has hindered the adoption of real-time
                 scheduling solutions in this class of applications.
                 Developers have adopted a wide range of heuristic
                 approaches to achieve an acceptable timing behavior but
                 the result is often unreliable. We propose a mechanism
                 to extend the benefits of real-time scheduling to
                 legacy applications based on the combination of two
                 techniques: (1) a real-time monitor that observes and
                 infers the activation period of the application, and
                 (2) a feedback mechanism that adapts the scheduling
                 parameters to improve its real-time performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Scharfenberger:2012:RIP,
  author =       "Christian Scharfenberger and Samarajiit Chakraborty
                 and Georg F{\"a}rber",
  title =        "Robust image processing for an omnidirectional
                 camera-based smart car door",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "87:1--87:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2362354",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Over the last decade, there has been an increasing
                 emphasis on driver-assistance systems for the
                 automotive domain. In this article, we report our work
                 on designing a camera-based surveillance system
                 embedded in a ``smart'' car door. Such a camera is used
                 to monitor the ambient environment outside the car, for
                 instance, the presence of obstacles such as approaching
                 cars or cyclists who might collide with the car door if
                 opened-and automatically control the car door
                 operations. This is an enhancement to the currently
                 available side-view mirrors that the driver/passenger
                 checks before opening the car door. The focus of this
                 article is on fast and robust image processing
                 algorithms specifically targeting such a smart car door
                 system. The requirement is to quickly detect traffic
                 objects of interest from grayscale images captured by
                 omnidirectional cameras. While known algorithms for
                 object extraction from the image processing literature
                 rely on color information and are sensitive to shadows
                 and illumination changes, our proposed algorithms are
                 highly robust, can operate on grayscale images (color
                 images are not available in our setup), and output
                 results in real time. We present a number of
                 experimental results based on image sequences captured
                 from real-life traffic scenarios to demonstrate the
                 applicability of our algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gordon-Ross:2012:CCR,
  author =       "Ann Gordon-Ross and Frank Vahid and Nikil Dutt",
  title =        "Combining code reordering and cache configuration",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "88:1--88:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2399177",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The instruction cache is a popular optimization target
                 due to the cache's high impact on system performance
                 and power and because of the cache's predictable
                 temporal and spatial locality. This article is an in
                 depth study on the interaction of code reordering (a
                 long-known technique) and cache configuration (a
                 relatively new technique). Experimental results show
                 that code reordering coupled with cache configuration
                 reveals additional energy savings as high as 10--15\%
                 for several benchmarks with reduced cache area as high
                 as 48\%. To exploit these additional benefits, we
                 architect and evaluate several design exploration
                 heuristics for combining these two methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baiocchi:2012:EDB,
  author =       "Jos{\'e} A. Baiocchi and Bruce R. Childers and Jack W.
                 Davidson and Jason D. Hiser",
  title =        "Enabling dynamic binary translation in embedded
                 systems with scratchpad memory",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "89:1--89:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362336.2399178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Important challenges for embedded systems can be
                 addressed by dynamic binary translation. A dynamic
                 binary translator stores translated instructions in a
                 software-managed code cache, which is usually large to
                 minimize overhead. This article shows how to use a
                 small scratchpad memory for the code cache. A small
                 code cache may require frequent code evictions and
                 retranslation, which degrade performance. We propose
                 techniques to reduce the number of instructions
                 inserted by the translator and a way to form fragments
                 that minimizes translated code size. With our
                 techniques, a much smaller code cache can hold a
                 program's translated code working set.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khalgui:2013:ISI,
  author =       "Mohamed Khalgui and Zhiwu Li",
  title =        "Introduction to the {Special Issue on Modeling and
                 Verification of Discrete Event Systems}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2013:DLE,
  author =       "Shouguang Wang and Chengying Wang and Yanping Yu",
  title =        "Design of Liveness-Enforcing Supervisors for {S3PR}
                 Based on Complementary Places",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, an algorithm is proposed to design
                 liveness-enforcing supervisors for systems of simple
                 sequential processes with resources (S$^3$PR) based on
                 complementary places. Firstly, a mixed integer
                 programming (MIP) based deadlock detection method is
                 used to find unmarked strict minimal siphons from an
                 infinite-capacity net. Next, the finite-capacity net,
                 in which liveness can be enforced, is obtained by
                 adding capacity function to the infinite-capacity net.
                 Finally, complementary-place transformation is used to
                 transform the finite-capacity net into an
                 infinite-capacity net. This article focuses on adding a
                 complementary place to each operation place that is
                 related to unmarked siphons, deals with the deadlock
                 problem from a new view point, and hence advances the
                 deadlock control theory. Compared with the existing
                 methods, the new policy is easier to implement for real
                 industrial systems. More importantly, design of a
                 complementary-place supervisor is very easy. Finally,
                 in some cases, the new policy can obtain a structurally
                 simpler supervisor with more permissive behavior than
                 the existing methods do. A flexible manufacturing
                 systems (FMS) example is used to compare the proposed
                 policy with some other methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2013:CMS,
  author =       "Yufeng Chen and Gaiyun Liu",
  title =        "Computation of Minimal Siphons in {Petri} Nets by
                 Using Binary Decision Diagrams",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Siphons play an important role in the development of
                 deadlock control methods by using Petri nets. The
                 number of siphons increases exponentially with respect
                 to the size of a Petri net. This article presents a
                 symbolic approach to the computation of minimal siphons
                 in Petri nets by using binary decision diagrams (BDD).
                 The siphons of a Petri net can be found via a set of
                 logic conditions. The logic conditions are symbolically
                 modeled by using Boolean algebras. The operations of
                 Boolean algebras are implemented by BDD that are
                 capable of representing large sets of siphons with
                 small shared data structures. The proposed method first
                 uses BDD to compute all siphons of a Petri net and then
                 a binary relation is designed to extract all minimal
                 siphons. Finally, by using a number of examples, the
                 efficiency of the proposed method is verified through
                 different-sized problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ding:2013:DAV,
  author =       "Zhijun Ding and Changjun Jiang and Mengchu Zhou",
  title =        "Design, Analysis and Verification of Real-Time Systems
                 Based on Time {Petri} Net Refinement",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A type of refinement operations of time Petri nets is
                 presented for design, analysis and verification of
                 complex real-time systems. First, the behavior
                 preservation is studied under time constraints in a
                 refinement operation, and a sufficient condition for
                 behavior preservation is obtained. Then, the property
                 preservation is considered, and the results indicate
                 that if the refinement operation of time Petri nets
                 satisfies behavior preservation, it can also preserve
                 properties such as boundedness and liveness. Finally,
                 based on the behavior preservation, a reachability
                 decidability algorithm of a refined time Petri net is
                 designed using the reachability trees of its original
                 net and subnet. The research results are illustrated by
                 an example of designing, analyzing and verifying a
                 real-time manufacturing system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{McInnes:2013:MAT,
  author =       "Allan I. McInnes",
  title =        "Modeling and Analysis of {TinyOS} Sensor Node
                 Firmware: a {CSP} Approach",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless sensor networks are an increasingly popular
                 application area for embedded systems. Individual
                 sensor nodes within a network are typically
                 resource-constrained, event-driven, and require a high
                 degree of concurrency. This combination of requirements
                 motivated the development of the widely used TinyOS
                 sensor node operating system. The TinyOS concurrency
                 model is a lightweight nonpreemptive system designed to
                 suit the needs of typical sensor network applications.
                 Although the TinyOS concurrency model is easier to
                 reason about than preemptive threads, it can still give
                 rise to undesirable behavior due to unexpected
                 interleavings of related tasks, or unanticipated
                 preemption by interrupt handlers. To aid TinyOS
                 developers in understanding the behavior of their
                 programs we have developed a technique for using the
                 process algebra Communicating Sequential Processes
                 (CSP) to model the interactions between TinyOS
                 components, and between an application and the TinyOS
                 scheduling and preemption mechanisms. Analysis of the
                 resulting models can help TinyOS developers to discover
                 and diagnose concurrency-related errors in their
                 designs that might otherwise go undetected until after
                 the application has been widely deployed. Such analysis
                 is particularly valuable for the TinyOS components that
                 are used as building blocks for a large number of other
                 applications, since a subtle or sporadic error in a
                 widely deployed building block component could be
                 extremely costly to repair.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Godary-Dejean:2013:FVD,
  author =       "Karen Godary-Dejean and David Andreu",
  title =        "Formal Validation of a Deterministic {MAC} Protocol",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article deals with the formal validation of
                 STIMAP, a medium access protocol that has been designed
                 to meet the specific requirements of an implantable
                 network-based neuroprosthesis. This article presents
                 the modeling and the validation of its medium access,
                 using model checking on Time Petri Nets. Doing so, we
                 show that existent formal methods and tools are not
                 perfectly suitable for the validation of real systems,
                 especially when some hardware parameters have to be
                 considered. This article then presents how these
                 difficulties have been managed during the modeling and
                 verification phases, and gives the validation results
                 for STIMAP, providing constraints to respect.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boucheneb:2013:RIS,
  author =       "Hanifa Boucheneb and Kamel Barkaoui",
  title =        "Reducing Interleaving Semantics Redundancy in
                 Reachability Analysis of Time {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The main problem of verification techniques based on
                 exploration of (reachable) state space is the state
                 explosion problem. In timed models, abstract states
                 reached by different interleavings of the same set of
                 transitions are, in general, different and their union
                 is not necessarily an abstract state. To attenuate this
                 state explosion, it would be interesting to reduce the
                 redundancy caused by the interleaving semantics by
                 agglomerating all these abstract states whenever their
                 union is an abstract state. This article considers the
                 time Petri net model and establishes some sufficient
                 conditions that ensure that this union is an abstract
                 state. In addition, it proposes a procedure to compute
                 this union without computing beforehand intermediate
                 abstract states. Finally, it shows how to use this
                 result to improve the reachability analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2013:SCE,
  author =       "Zhiming Zhang and Weimin Wu",
  title =        "Sequence Control of Essential Siphons for Deadlock
                 Prevention in {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Deadlock prevention is crucial to the modeling of
                 flexible manufacturing systems. In the Petri net
                 framework, deadlock prevention is often addressed by
                 siphon-based control (SC) policies. Recent research
                 results show that SC methods can avoid full siphon
                 enumeration by using mixed integer programming (MIP) to
                 greatly increase the computational efficiency so that
                 it can be applied in large systems in computable time.
                 Besides, maximally permissive control solutions can be
                 obtained by means of iterative siphon control (ISC)
                 approaches and MIP. Then the remaining problems are
                 redundancy and MIP iterations. Redundant controllers
                 make the closed-loop system more complicated and each
                 MIP iteration increases the total computational time.
                 This article proposes a revised ISC deadlock prevention
                 policy which can achieve better results than the other
                 reported methods in terms of redundancy and MIP
                 iterations while maintaining the maximal
                 permissiveness. Several benchmark examples are provided
                 to illustrate the proposed approach and to be compared
                 with the other reported methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2013:HGA,
  author =       "Zakir Hussain Ahmed",
  title =        "A Hybrid Genetic Algorithm for the {Bottleneck
                 Traveling Salesman Problem}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The bottleneck traveling salesman problem is to find a
                 Hamiltonian circuit that minimizes the largest cost of
                 any of its arcs in a graph. A simple genetic algorithm
                 (GA) using sequential constructive crossover has been
                 developed to obtain heuristic solution to the problem.
                 The hybrid GA incorporates 2-opt search, another
                 proposed local search and immigration to the simple GA
                 for obtaining better solution. The efficiency of our
                 hybrid GA to the problem against two existing heuristic
                 algorithms has been examined for some symmetric TSPLIB
                 instances. The comparative study shows the
                 effectiveness of our hybrid algorithm. Finally, we
                 present solutions to the problem for asymmetric TSPLIB
                 instances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2013:OSL,
  author =       "Naiqi Wu and Mengchu Zhou and Gang Hu",
  title =        "One-Step Look-Ahead Maximally Permissive Deadlock
                 Control of {AMS} by Using {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is desired that a deadlock control policy for
                 automated manufacturing systems (AMS) is maximally
                 permissive. However, its tractability issue remains
                 open, and this work addresses this important issue. It
                 models AMS with a resource-oriented Petri net (ROPN)
                 and presents a necessary and sufficient condition under
                 which there exists a one-step look-ahead maximally
                 permissive control policy for deadlock avoidance in
                 AMS. It further identifies some conditions under which
                 a one-step look-ahead maximally permissive deadlock
                 control policy exists for a single-capacity system. The
                 conditions can be conveniently examined by using the
                 developed ROPN model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2013:TBD,
  author =       "Yi-Sheng Huang and Yen-Liang Pan and Pin-June Su",
  title =        "Transition-Based Deadlock Detection and Recovery
                 Policy for {FMSs} Using Graph Technique",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A transition-controlled deadlock detection and
                 recovery prevention policy is presented for a subclass
                 of Petri nets used to model flexible manufacturing
                 systems. The subclass is called systems of simple
                 sequential processes with resources (S$^3$PR). The
                 proposed policy is different from the standard deadlock
                 prevention policies. Instead of adding control places,
                 this policy adds a controlled transition to solve a
                 group of deadlocked markings that have the same
                 graph-based property. Finally, the results of our study
                 indicate that the proposed policy appears to be more
                 permissive than those existing ones that add control
                 places.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nazemzadeh:2013:FMD,
  author =       "Payam Nazemzadeh and Abbas Dideban and Meisam
                 Zareiee",
  title =        "Fault Modeling in Discrete Event Systems Using {Petri}
                 Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article a model-based controller
                 reconfiguration method for fault-tolerant control of
                 discrete event systems has been introduced. In this
                 method, we model the fault conditions for each
                 specified fault as a new model called fault model. The
                 system then consists of three different models called
                 process, specification and fault. The faulty parts of
                 the system are not permitted to do any job and the
                 controller tries to enforce the specifications by other
                 parts of the system. With this method, the controller
                 reconfiguration problem for fault- tolerant control of
                 discrete event systems converts to the problem of
                 synchronizing the process, specification and fault
                 model. We must synthesize a supervisor that can enforce
                 both specifications and faults status. If this
                 supervisor can be determined, we can achieve a
                 fault-tolerant controller. Implementing both
                 specification and fault models in the system, may lead
                 to a large number of forbidden states and constraints
                 and so on a more complicated forbidden states problem
                 must be solved. The application of constraints
                 simplification methods is shown. By the existing
                 methods for offline simplifying of constraints, we can
                 arrive at a simplified fault tolerant controller.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mhamdi:2013:FMT,
  author =       "Tarek Mhamdi and Osman Hasan and Sofi{\`e}ne Tahar",
  title =        "Formalization of Measure Theory and {Lebesgue}
                 Integration for Probabilistic Analysis in {HOL}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic systems that exhibit probabilistic behavior
                 represent a large class of man-made systems such as
                 communication networks, air traffic control, and other
                 mission-critical systems. Evaluation of quantitative
                 issues like performance and dependability of these
                 systems is of paramount importance. In this paper, we
                 propose a generalized methodology to formally reason
                 about probabilistic systems within a theorem prover. We
                 present a formalization of measure theory in the HOL
                 theorem prover and use it to formalize basic concepts
                 from the theory of probability. We also use the
                 Lebesgue integration to formalize statistical
                 properties of random variables. To illustrate the
                 practical effectiveness of our methodology, we formally
                 prove classical results from the theories of
                 probability and information and use them in a data
                 compression application in HOL.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khalgui:2013:RRE,
  author =       "Mohamed Khalgui and Olfa Mosbahi and Zhiwu Li",
  title =        "Runtime Reconfigurations of Embedded Controllers",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The article deals with Reconfigurable Embedded Control
                 Systems following different Component-based
                 Technologies and/or Architecture Description Languages
                 used today in Industry. We define a Control Component
                 as a software unit to support control tasks of the
                 system which is assumed to be a network of components
                 with precedence constraints. We define an agent-based
                 architecture to handle automatic reconfigurations under
                 well-defined conditions by creating, deleting or
                 updating components to bring the whole system into safe
                 and optimal behaviors. To cover all possible
                 reconfiguration forms, we model the agent by nested
                 state machines according to the formalism Net
                 Condition/Event Systems (abbr. NCES) which is an
                 extension of Petri nets. We apply in addition a model
                 checking to verify functional and extra-functional
                 properties according to the temporal logic
                 ``Computation Tree Logic'' (abbr. CTL). The goal is to
                 check the agent's reactivity after any evolution of the
                 environment. Several complex networks can implement the
                 system such that each one is executed at a given time
                 when a corresponding reconfiguration scenario is
                 automatically applied by the agent. To check the
                 correctness of each one of them, we apply in several
                 steps a refinement-based approach that automatically
                 specifies feasible Control Components according to
                 NCES. The model checker SESA is automatically applied
                 in each step to verify deadlock properties of new
                 generated components, and is manually used to verify
                 CTL-based properties according to user requirements.
                 Two Industrial Benchmark Production Systems FESTO and
                 EnAS available in our research laboratory are applied
                 to explain the article's contributions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mery:2013:FSM,
  author =       "Dominique M{\'e}ry and Neeraj Kumar Singh",
  title =        "Formal Specification of Medical Systems by Proof-Based
                 Refinement",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406351",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Formal methods have emerged as an alternative approach
                 to ensuring quality and correctness of highly critical
                 systems, overcoming limitations of traditional
                 validation techniques such as simulation and testing.
                 We propose a refinement-based methodology for complex
                 medical systems design, which possesses all the
                 required key features. A refinement-based combined
                 approach of formal verification, model validation using
                 a model-checker and refinement chart is proposed in
                 this methodology for designing a high-confidence
                 medical device. Furthermore, we show the effectiveness
                 of this methodology for the design of a cardiac
                 pacemaker system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mosbahi:2013:CFM,
  author =       "Olfa Mosbahi",
  title =        "Combining Formal Methods for the Development of
                 Reactive Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406352",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article deals with the use of two verification
                 approaches: theorem proving and model checking. We
                 focus on the Event-B method by using its associated
                 theorem proving tool (Click\_n\_Prove), and on the
                 language TLA$^+$ by using its model checker TLC. By
                 considering the limitation of the Event-B method to
                 invariance properties, we propose to apply the language
                 TLA$^+$ to verify liveness properties on a software
                 behavior. We extend first the expressivity and the
                 semantics of a B model (called temporal B model) to
                 deal with the specification of fairness and eventuality
                 properties. Second, we give transformation rules from a
                 temporal B model into a TLA$^+$ module. We present in
                 particular, our prototype system called B2TLA$^+$, that
                 we have developed to support this transformation; then
                 we can verify these properties thanks to the model
                 checker TLC on finite state systems. For the
                 verification of infinite-state systems, we propose the
                 use of the predicate diagrams. We illustrate our
                 approach on a case study of a parcel sorting system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sunder:2013:FVD,
  author =       "Christoph S{\"u}nder and Valeriy Vyatkin and Alois
                 Zoitl",
  title =        "Formal Verification of Downtimeless System Evolution
                 in Embedded Automation Controllers",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406353",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a new formal approach to
                 validation of on-the-fly modification of control
                 software in automation systems. The concept of
                 downtimeless system evolution (DSE) is introduced. The
                 DSE is essentially based on the use of IEC 61499 system
                 architecture and formal modeling and verification of
                 the hardware and software of an automation device. The
                 validation is performed by means of two complimentary
                 techniques: analytic calculations and formal
                 verification by model-checking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khalgui:2013:DRA,
  author =       "Mohamed Khalgui",
  title =        "Distributed Reconfigurations of Autonomous {IEC61499}
                 Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2406336.2406354",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The article deals with Distributed Multiagent
                 Reconfigurable Embedded Control Systems following the
                 International Industrial Standard IEC61499 in which a
                 Function Block (Abbreviated by FB) is an
                 event-triggered software component owning data and a
                 control system is a network of distributed blocks. We
                 define a multiagent embedded architecture in which a
                 Reconfiguration Agent is affected to each device of the
                 execution environment to apply local reconfigurations,
                 and a Coordination Agent is proposed for coordination
                 between devices in order to guarantee safe and coherent
                 distributed reconfigurations. A Communication Protocol
                 is proposed to handle such coordination by using
                 well-defined Coordination Matrices. A prototype is
                 developed to simulate the whole architecture when
                 faults occur or system's optimizations are applied. We
                 specify Reconfiguration Agents to be modeled by nested
                 state machines, and the Coordination Agent according to
                 the formalism Net Condition/Event Systems (Abbreviated
                 by NCES) which is an extension of Petri nets. To allow
                 correct and coherent distributed reconfigurations, we
                 check all possible interactions between controllers by
                 verifying that whenever a reconfiguration is applied in
                 a device, the Coordination Agent and other concerned
                 devices react as described in user requirements. We
                 propose finally XML-based implementations of both
                 Coordination and Reconfiguration Agents according to
                 the technology IEC61499. The article's contributions
                 are applied to two Benchmark Production Systems
                 available in our research laboratory.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2013:ISS,
  author =       "Jian-Jia Chen and Maurizio Palesi",
  title =        "Introduction to the special section on
                 {ESTIMedia'12}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435228",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nikitakis:2013:NLP,
  author =       "Antonis Nikitakis and Savvas Papaioannou and Ioannis
                 Papaefstathiou",
  title =        "A novel low-power embedded object recognition system
                 working at multi-frames per second",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435229",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One very important challenge in the field of
                 multimedia is the implementation of fast and detailed
                 Object Detection and Recognition systems. In
                 particular, in the current state-of-the-art mobile
                 multimedia systems, it is highly desirable to detect
                 and locate certain objects within a video frame in real
                 time. Although a significant number of Object Detection
                 and Recognition schemes have been developed and
                 implemented, triggering very accurate results, the vast
                 majority of them cannot be applied in state-of-the-art
                 mobile multimedia devices; this is mainly due to the
                 fact that they are highly complex schemes that require
                 a significant amount of processing power, while they
                 are also time consuming and very power hungry. In this
                 article, we present a novel FPGA-based embedded
                 implementation of a very efficient object recognition
                 algorithm called Receptive Field Cooccurrence
                 Histograms Algorithm (RFCH). Our main focus was to
                 increase its performance so as to be able to handle the
                 object recognition task of today's highly sophisticated
                 embedded multimedia systems while keeping its energy
                 consumption at very low levels. Our low-power embedded
                 reconfigurable system is at least 15 times faster than
                 the software implementation on a low-voltage high-end
                 CPU, while consuming at least 60 times less energy. Our
                 novel system is also 88 times more energy efficient
                 than the recently introduced low-power multi-core Intel
                 devices which are optimized for embedded systems. This
                 is, to the best of our knowledge, the first system
                 presented that can execute the complete complex object
                 recognition task at a multi frame per second rate while
                 consuming minimal amounts of energy, making it an ideal
                 candidate for future embedded multimedia systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhai:2013:MSA,
  author =       "Jiali Teddy Zhai and Hristo Nikolov and Todor
                 Stefanov",
  title =        "Mapping of streaming applications considering
                 alternative application specifications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435230",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Streaming applications often require a parallel Model
                 of Computation (MoC) to specify their application
                 behavior and to facilitate mapping onto Multi-Processor
                 System-on-Chip (MPSoC) platforms. Various performance
                 requirements and resource budgets of embedded systems
                 ask for an efficient design space exploration (DSE)
                 approach to select the best design from a design space
                 consisting of a large number of design choices.
                 However, existing DSE approaches explore the design
                 space that includes only architecture and mapping
                 alternatives for an initial application specification
                 given by the application designer. In this article, we
                 first show that a design often might not be optimal if
                 alternative specifications of a given application are
                 not taken into account. We further argue that the best
                 alternative specification consists of only independent
                 and load-balanced application tasks. Based on the
                 Polyhedral Process Network (PPN) MoC, we present an
                 approach to analyze and transform an initial PPN to an
                 alternative one that contains only independent
                 processes if possible. Finally, by prototyping
                 real-life applications on both FPGA-based MPSoCs and
                 desktop multi-core platforms, we demonstrate that
                 mapping the alternative application specification
                 results in a large performance gain compared to those
                 approaches, in which alternative application
                 specifications are not taken into account.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Geuns:2013:SST,
  author =       "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco
                 J. G. Bekooij",
  title =        "Sequential specification of time-aware stream
                 processing applications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435231",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic parallelization of Nested Loop Programs
                 (NLPs) is an attractive method to create embedded
                 real-time stream processing applications for multi-core
                 systems. However, the description and parallelization
                 of applications with a time dependent functional
                 behavior has not been considered in NLPs. In such a
                 description, semantic information about time dependent
                 behavior must be made available for the compiler, such
                 that an optimized time independent implementation can
                 be generated automatically. This article introduces
                 language constructs with temporal semantics to NLPs.
                 Using these language constructs, time dependent
                 applications can be specified and a corresponding
                 data-driven implementation can be generated for use on
                 a multi-core system. Despite that these time-aware
                 language constructs can be data-dependent, the
                 application remains functionally deterministic.
                 Pipelining is exploited to increase the throughput of
                 an application. The media access control (MAC) protocol
                 of an IEEE 802.11p WLAN transceiver is used to
                 illustrate the relevance and applicability of the
                 introduced concepts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2013:LAB,
  author =       "Daeyoung Lee and Hyunok Oh",
  title =        "A lifetime aware buffer assignment method for
                 streaming applications on {DRAM\slash PRAM} hybrid
                 memory",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435232",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a lifetime aware buffer
                 assignment method for streaming applications like
                 multimedia specified in a synchronous dataflow (SDF)
                 graph on a DRAM/PRAM hybrid memory in which the
                 endurance of PRAM is limited. We determine whether
                 buffers are assigned to DRAM or PRAM to minimize the
                 writing frequency of PRAM. To solve the problems, we
                 formulate them using Answer Set Programming.
                 Experimental results show that the proposed approach
                 increases the PRAM lifetime by 63\% compared with no
                 optimization, and shows the tradeoff between PRAM and
                 DRAM size to guarantee a lifetime constraint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chung:2013:EUE,
  author =       "Yi-Fan Chung and Yin-Tsung Lo and Chung-Ta King",
  title =        "Enhancing user experiences by exploiting energy and
                 launch delay trade-off of mobile multimedia
                 applications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435233",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Launch delay has been an important factor affecting
                 users' experiences in mobile multimedia applications.
                 To launch applications quickly, modern mobile systems
                 such as Android usually keep inactive applications in
                 the background and manage them through an LRU-based
                 activity stack. Whenever the user wants to run and
                 interact with a background application again, that
                 application can be switched back into the foreground
                 quickly from the activity stack without delay in
                 initializing the applications anew. Since background
                 multimedia applications often continuously consume the
                 battery power of the smart phone, the challenge is to
                 effect a balance between application launch delay and
                 battery lifetime. In this article, we propose
                 innovative application management strategies that
                 terminate ``unbeneficial'' background applications to
                 save energy and pre-launch ``beneficial'' applications
                 to improve the application launch delay. The proposed
                 strategies are evaluated through a trace-driven
                 simulation and a real experiment. The results show that
                 the average application launch delay can be reduced by
                 15\% while the average battery lifetime is increased by
                 18\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DeSutter:2013:ISS,
  author =       "Bjorn {De Sutter} and Jan Vitek",
  title =        "Introduction to the special section on {LCTES'11}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435234",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Berthier:2013:SPD,
  author =       "Nicolas Berthier and Florence Maraninchi and Laurent
                 Mounier",
  title =        "Synchronous programming of device drivers for global
                 resource control in embedded operating systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435235",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded systems, controlling a shared resource
                 like a bus, or improving a property like power
                 consumption, may be hard to achieve when programming
                 device drivers individually. In this article, we
                 propose a global resource control approach, based on a
                 centralized view of the devices' states. The solution
                 we propose operates on the hardware/software interface.
                 It involves a simple adaptation of the application
                 level, to communicate with the hardware via a control
                 layer. The control layer itself is built from a set of
                 simple automata: the device drivers, whose states
                 correspond to functional or power consumption modes,
                 and a controller to enforce global properties. All
                 these automata are programmed using a synchronous
                 language, and compiled into a single piece of C code.
                 We take as example the node of a sensor network. We
                 explain the approach in details, demonstrate its use
                 and benefits with an event-driven or multithreading
                 operating system, and draw guidelines for its use in
                 other contexts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cullmann:2013:CPA,
  author =       "Christoph Cullmann",
  title =        "Cache persistence analysis: Theory and practice",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "40:1--40:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435236",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "To compute a worst-case execution time (WCET) estimate
                 for a program, the architectural effects of the
                 underlying hardware must be modeled. For modern
                 processors this results in the need for a cache and
                 pipeline analysis. The timing-relevant result of the
                 cache analysis is the categorization of the accesses to
                 cached memory. Categorizations that are obtainable by
                 the well-known must and may cache analysis [Ferdinand
                 1997] are always-hit, always-miss and not-classified.
                 The cache persistence analysis tries to provide
                 additional information for the not-classified case to
                 limit the number of misses. There exists a cache
                 persistence analysis by Ferdinand and Wilhelm based on
                 abstract interpretation computing these
                 classifications. In this article, we present a
                 correctness issue with this analysis. To fix this
                 issue, we propose two new abstract interpretation based
                 persistence analyses and show their safety. One is
                 based on the known may analysis and a second one on the
                 concept of conflict counting. For fully timing
                 compositional architectures [Wilhelm et al. 2009] the
                 persistence information is straightforward to use. We
                 will apply the concepts of persistence analysis for the
                 first time to state-of-the-art architectures that
                 exhibit both timing anomalies and domino effects. Such
                 architectures do not allow the analyzer to quantify the
                 costs of a single cache hit or miss in isolation. To
                 make the usage of the persistence information feasible,
                 we integrate the presented novel persistence analyses
                 together with a novel path analysis approach into the
                 industrially used WCET analyzer aiT.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sifakis:2013:ISS,
  author =       "Joseph Sifakis and Lothar Thiele and Reinhard
                 Wilhelm",
  title =        "Introduction to the special section on rigorous
                 embedded systems design",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "41:1--41:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435237",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reineke:2013:SCR,
  author =       "Jan Reineke and Daniel Grund",
  title =        "Sensitivity of cache replacement policies",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "42:1--42:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435238",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The sensitivity of a cache replacement policy
                 expresses to what extent the execution history may
                 influence the number of cache hits and misses during
                 program execution. We present an algorithm to compute
                 the sensitivity of a replacement policy. We have
                 implemented this algorithm in a tool called R elacs
                 that can handle a large class of replacement policies
                 including LRU, FIFO, PLRU, and MRU. Sensitivity
                 properties obtained with Relacs demonstrate that the
                 execution history can have a strong impact on the
                 number of cache hits and misses if FIFO, PLRU, or MRU
                 is used. A simple model of execution time is used to
                 evaluate the impact of cache sensitivity on measured
                 execution times. The model shows that measured
                 execution times may strongly underestimate the
                 worst-case execution time for FIFO, PLRU, and MRU.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jeong:2013:RRM,
  author =       "Jinkyu Jeong and Hwanju Kim and Jeaho Hwang and
                 Joonwon Lee and Seungryoul Maeng",
  title =        "Rigorous rental memory management for embedded
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "43:1--43:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435239",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Memory reservation in embedded systems is a prevalent
                 approach to provide a physically contiguous memory
                 region to its integrated devices, such as a camera
                 device and a video decoder. Inefficiency of the memory
                 reservation becomes a more significant problem in
                 emerging embedded systems, such as smartphones and
                 smart TVs. Many ways of using these systems increase
                 the idle time of their integrated devices, and
                 eventually decrease the utilization of their reserved
                 memory. In this article, we propose a scheme to
                 minimize the memory inefficiency caused by the memory
                 reservation. The memory space reserved for a device can
                 be rented for other purposes when the device is not
                 active. For this scheme to be viable, latencies
                 associated with reallocating the memory space should be
                 minimal. Volatile pages are good candidates for such
                 page reallocation since they can be reclaimed
                 immediately as they are needed by the original device.
                 We also provide two optimization techniques,
                 lazy-migration and adaptive-activation. The former
                 increases the lowered utilization of the rental memory
                 by our volatile page allocations, and the latter saves
                 active pages in the rental memory during the
                 reallocation. We implemented our scheme on a smartphone
                 development board with the Android Linux kernel. Our
                 prototype has shown that the time for the return
                 operation is less than 0.77 seconds in the tested
                 cases. We believe that this time is acceptable to
                 end-users in terms of transparency since the time can
                 be hidden in application initialization time. The
                 rental memory also brings throughput increases ranging
                 from 2\% to 200\% based on the available memory and the
                 applications' memory intensiveness.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vasilikos:2013:HSA,
  author =       "Vasileios Vasilikos and Georgios Smaragdos and
                 Christos Strydis and Ioannis Sourdis",
  title =        "Heuristic search for adaptive, defect-tolerant
                 multiprocessor arrays",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "44:1--44:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435240",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, new heuristic-search methods and
                 algorithms are presented for enabling highly efficient
                 and adaptive, defect-tolerant multiprocessor arrays. We
                 consider systems where a homogeneous multiprocessor
                 array lies on top of reconfigurable interconnects which
                 allow the pipeline stages of the processors to be
                 connected in all possible configurations. Considering
                 the multiprocessor array partitioned in substitutable
                 units at the granularity of pipeline stages, we employ
                 a variety of heuristic-search methods and algorithms to
                 isolate and replace defective units. The proposed
                 heuristics are designed for off-line execution and aim
                 at minimizing the performance overhead necessarily
                 introduced to the array by the interconnects' latency.
                 An empirical evaluation of the designed algorithms is
                 then carried out, in order to assess the targeted
                 problem and the efficacy of our approach. Our findings
                 indicate this to be a NP-complete computational
                 problem, however, our heuristic-search methods can
                 achieve, for the problem sizes we exhaustively
                 searched, 100\% accuracy in finding the optimal
                 solution among 10$^{19}$ possible candidates within 2.5
                 seconds. Alternatively, they can provide near-optimal
                 solutions at an accuracy which consistently exceeds
                 70\% (compared to the optimal solution) in only
                 10$^{-4}$ seconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Marinescu:2013:FSJ,
  author =       "Maria-Cristina Marinescu and C{\'e}sar S{\'a}nchez",
  title =        "Fusing statecharts and {Java}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "45:1--45:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435241",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents FUSE, an approach for modeling
                 and implementing embedded software components which
                 starts from a main-stream programming language and
                 brings some of the key concepts of Statecharts as
                 first-class elements within this language. Our approach
                 provides a unified programming environment which not
                 only preserves some of the advantages of Statecharts'
                 formal foundation but also directly supports features
                 of object-orientation and strong typing. By specifying
                 Statecharts directly in FUSE we eliminate the
                 out-of-synch between the model and the generated code
                 and we allow the tuning and debugging to be done within
                 the same programming model. This article describes the
                 main language constructs of FUSE and presents its
                 semantics by translation into the Java programming
                 language. We conclude by discussing extensions to the
                 base language which enable the efficient static
                 checking of program properties.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hubner:2013:ISS,
  author =       "Michael H{\"u}bner",
  title =        "Introduction to the special section on multiprocessor
                 system-on-chip for cyber-physical systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "46:1--46:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435242",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paulin:2013:PPP,
  author =       "Pierre G. Paulin and Ali Erdem {\"O}zcan and Vincent
                 Gagn{\'e} and Bruno Lavigueur and Olivier Benny",
  title =        "Parallel programming patterns for multi-processor
                 {SoC}: Application to video processing",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "47:1--47:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435243",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Efficient, scalable and productive parallel
                 programming is a major challenge for exploiting the
                 future multi-processor SoC platforms. This article
                 presents the MultiFlex programming environment which
                 has been developed to address this challenge. It is
                 targeted for use on Platform 2012, a scalable
                 multi-processor fabric. The MultiFlex environment
                 supports high-level simulation, iterative platform
                 mapping, and includes tools for programming model aware
                 debug, trace, visualization and analysis. This article
                 focuses on the two classes of programming abstractions
                 supported in MultiFlex. The first is a set of Parallel
                 Programming Patterns (PPP) which offer a rich set of
                 programming abstractions for implementing efficient
                 data- and task-level parallel applications. The second
                 is a Reactive Task Management (RTM) abstraction, which
                 offers a lightweight C-based API to support dynamic
                 dispatching of small grain tasks on tightly coupled
                 parallel processing resources. The use of the MultiFlex
                 native programming model is illustrated through the
                 capture and mapping of two representative video
                 applications. The first is a high-quality rescaling
                 (HQR) application on a multi-processor platform. We
                 present the details of the optimization process which
                 was required for mapping the HQR application, for which
                 the reference code requires 350 GIPS (giga instructions
                 per second), onto a 16 processor cluster. Our results
                 show that the parallel implementation using the PPP
                 model offers almost linear acceleration with respect to
                 the number of processing elements. The second
                 application is a high-definition VC-1 decoder. For this
                 application, we illustrate two different parallel
                 programming model variants, one using PPPs, the other
                 based on RTM. These two versions are mapped onto two
                 variants of a homogeneous version of the Platform 2012
                 multi-core fabric.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Thiele:2013:PTT,
  author =       "Lothar Thiele and Lars Schor and Iuliana Bacivarov and
                 Hoeseok Yang",
  title =        "Predictability for timing and temperature in
                 multiprocessor system-on-chip platforms",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "48:1--48:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435244",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "High computational performance in multiprocessor
                 system-on-chips (MPSoCs) is constrained by the
                 ever-increasing power densities in integrated circuits,
                 so that nowadays MPSoCs face various thermal issues.
                 For instance, high chip temperatures may lead to
                 long-term reliability concerns and short-term
                 functional errors. Therefore, the new challenge in
                 designing embedded real-time MPSoCs is to guarantee the
                 final performance and correct function of the system,
                 considering both functional and non-functional
                 properties. One way to achieve this is by ruling out
                 mapping alternatives that do not fulfill requirements
                 on performance or peak temperature already in early
                 design stages. In this article, we propose a
                 thermal-aware optimization framework for mapping
                 real-time applications onto MPSoC platforms. The
                 performance and temperature of mapping candidates are
                 evaluated by formal temporal and thermal analysis
                 models. To this end, analysis models are automatically
                 generated during design space exploration, based on the
                 same specifications as used for software synthesis. The
                 analysis models are automatically calibrated with
                 performance data reflecting the execution of the system
                 on the target platform. The data is automatically
                 obtained prior to design space exploration based on a
                 set of benchmark mappings. Case studies show that the
                 performance and temperature requirements are often
                 conflicting goals and optimizing them together leads to
                 major benefits in terms of a guaranteed and predictable
                 high performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Davare:2013:MDE,
  author =       "Abhijit Davare and Douglas Densmore and Liangpeng Guo
                 and Roberto Passerone and Alberto L.
                 Sangiovanni-Vincentelli and Alena Simalatsar and Qi
                 Zhu",
  title =        "{metroII}: a design environment for cyber-physical
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "49:1--49:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435245",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-Physical Systems are integrations of computation
                 and physical processes and as such, will be
                 increasingly relevant to industry and people. The
                 complexity of designing CPS resides in their
                 heterogeneity. Heterogeneity manifest itself in
                 modeling their functionality as well as in the
                 implementation platforms that include a multiplicity of
                 components such as microprocessors, signal processors,
                 peripherals, memories, sensors and actuators often
                 integrated on a single chip or on a small package such
                 as a multi-chip module. We need a methodology, tools
                 and environments where heterogeneity can be dealt with
                 at all levels of abstraction and where different tools
                 can be integrated. We present here Platform-Based
                 Design as the CPS methodology of choice and metroII, a
                 design environment that supports it. We present the
                 metamodeling approach followed in metroII, how to
                 couple the functionality and implementation platforms
                 of CPS, and the simulation technology that supports the
                 analysis of CPS and of their implementation. We also
                 present examples of use and the integration of metroII
                 with another popular design environment developed at
                 Verimag, BIP.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bogdan:2013:PCH,
  author =       "Paul Bogdan and Siddharth Jain and Radu Marculescu",
  title =        "Pacemaker control of heart rate variability: a cyber
                 physical system perspective",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "50:1--50:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435246",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cardiac diseases, like those related to abnormal heart
                 rate activity, have an enormous economic and
                 psychological impact worldwide. The approaches used to
                 control the behavior of modern pacemakers ignore the
                 fractal nature of heart rate activity. The purpose of
                 this article is to present a Cyber Physical System
                 approach to pacemaker design that exploits precisely
                 the fractal properties of heart rate activity in order
                 to design the pacemaker controller. Towards this end,
                 we solve a finite horizon optimal control problem based
                 on the heartbeat time series and show that this control
                 problem can be converted into a system of linear
                 equations. We also compare and contrast the performance
                 of the fractal optimal control problem under six
                 different cost functions. Finally, to get an idea of
                 hardware complexity, we implement the fractal optimal
                 controller on a Virtex4 FPGA and report some
                 preliminary results in terms of area overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gohringer:2013:RAN,
  author =       "Diana G{\"o}hringer and Lukas Meder and Oliver Oey and
                 J{\"u}rgen Becker",
  title =        "Reliable and adaptive network-on-chip architectures
                 for cyber physical systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "51:1--51:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435227.2435247",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reliability in embedded systems is crucial for many
                 application domains. Especially, for safety critical
                 application, as they can be found in the automotive and
                 avionic domain, a high reliability has to be ensured.
                 The technology in chip production undergoes a steady
                 shrinking process from nowadays 25 nanometers. It is
                 proven that coming technologies, which are much
                 smaller, can have a higher defect rate after
                 production, but also at runtime. The physical effects
                 at runtime come from a higher susceptibility for
                 radiation. Since the silicon die of a field
                 programmable gate array (FPGA) includes a high amount
                 of physical wiring, the radiation effect plays here a
                 major role. Therefore, this article describes an
                 approach of a reliable Network-on-Chip (NoC) which can
                 be used for an FPGA-based system. The article describes
                 the concept and the physical realization of this NoC
                 and evaluates its reliability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2013:SIE,
  author =       "Jongsung Kim and Javier A. Barria and Morris Chang and
                 Victor C. M. Leung",
  title =        "Special issue on embedded systems for interactive
                 multimedia services {(ES-IMS)}",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423637",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2013:ELS,
  author =       "Yeong-Sheng Chen and Yun-Ju Ting and Chih-Heng Ke and
                 Naveen Chilamkruti and Jong Hyuk Park",
  title =        "Efficient localization scheme with ring overlapping by
                 utilizing mobile anchors in wireless sensor networks",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423638",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This study proposes an efficient localization scheme
                 in wireless sensor networks. The proposed scheme
                 utilizes mobile anchors and is based on ring
                 overlapping. In a wireless sensor network, the nodes
                 that know their locations are called reference nodes,
                 and the other nodes that are without the knowledge of
                 their locations are called blind nodes. To localize a
                 certain blind node, by comparing the relative RSSI
                 (Received Signal Strength Indicator) values among
                 nodes, mobile beacons are utilized to find out the
                 rings that are centered at a reference node and contain
                 the blind node. These rings are called B-Rings. Since
                 the mobile anchors and the reference nodes know their
                 own locations, the B-Rings can be precisely derived.
                 Moreover, by using multiple mobile beacons, the widths
                 of the B-Rings can be further minimized; and then by
                 overlapping them, the location of the blind nodes can
                 be efficiently estimated. Most existing localization
                 schemes that utilize mobile anchors let the mobile
                 anchors move randomly. In contrast, the proposed scheme
                 provides regular and simple movement mechanisms for the
                 mobile anchors. Thus, the mobile anchors consume less
                 energy than the other schemes, in which the mobile
                 anchors move randomly. Analytical analysis and
                 simulation results show that the proposed localization
                 mechanism can achieve better location accuracy as well
                 as less movement length of the mobile anchor than the
                 other existing related approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2013:DEI,
  author =       "Hung-Min Sun and Chi-Yao Weng and Shiuh-Jeng Wang and
                 Cheng-Hsing Yang",
  title =        "Data embedding in image-media using weight-function on
                 modulo operations",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multimedia hiding system is to embed message behind
                 the specified media, but it is still kept normal in
                 media representations via human sensitive organizations
                 without causing imperceptibility. In this article, we
                 propose a data hiding system by means of flexible
                 exploiting modification directions to achieve safer
                 message concealments in image-media. In our scheme, $n$
                 cover-pixels are flexibly chosen on modulo operations
                 to embed a secret $s$, where $ n = \lceil \log_3 (s)
                 \rceil $. The varied pixel values associated with the
                 chosen $n$ pixels are only changed among $ [ - 1, 1] $.
                 Because the numbers of adjustable pixels are much
                 greater than the pixels in the past scheme, our scheme
                 is able to obtain a higher embedded ratio in response
                 to the capacity requirements of information hiding
                 systems. In addition, we also applied the
                 statistics-steganalyzers to demonstrate that our scheme
                 has accomplishment not only higher capacity but also
                 kept the robustness against the blind steganalyzers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2013:AIG,
  author =       "Sanghyun Seo and Seungtaek Ryoo and Kyunghyun Yoon",
  title =        "Artistic image generation for emerging multimedia
                 services by impressionist manner",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423640",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose the rendering framework
                 for painting-like image generation and general system
                 architecture for mobile device. Especially, we focused
                 on a color division method for generating
                 neo-impressionist images. The French painter, George
                 Seurat, introduced pointillism under the theory that
                 the individual pigments of colors on the canvas are
                 reconstructed on the human retina. Pointillism is a
                 painting technique in which many small brush strokes
                 are combined to form a picture and determines the color
                 of brush strokes based on the optical mixing of
                 juxtaposed colors. In order to express countless
                 separate dots, we form hierarchical points using Wang
                 Tiles contained points. Also palette will be
                 constructed using neo-impressionist colors. Based on
                 this palette, we propose color division algorithm that
                 distributes hierarchical point's color to pointillist
                 colors using probability function. Finally,
                 hierarchical points set that applied proposed color
                 division rule is converted into brush strokes that
                 possesses properties such as shape and direction. This
                 rendering algorithm is performed in our proposed
                 system. Our scheme is able to produce a painting with
                 artistic style and be applied to the various platform
                 having the different computing performance and display
                 resolution. This system also can be extended to various
                 imaging devices (IPTV, camera, smart phone, digital
                 photo frame and so on).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2013:EEN,
  author =       "Sang Oh Park and Sung Jo Kim",
  title =        "{ENFFiS}: an enhanced {NAND} flash memory file system
                 for mobile embedded multimedia system",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Since the typical erase cycle limit of a NAND flash
                 memory's block is about 1,000,000, flash memory should
                 be erased as evenly as possible; otherwise, file system
                 hot spots will soon be worn out. This forces a NAND
                 flash memory file system to scan the whole flash memory
                 during its mount rather than saving frequently updated
                 file system information in a fixed area. Since the
                 mount time linearly increases with the size of NAND
                 flash memory, boot times of embedded systems are also
                 linearly increased. In addition, since data loss may
                 occur if a file system terminates abnormally due to
                 unexpected errors, a stability scheme for NAND flash
                 memory file system is in great demand. To resolve these
                 problems, this article suggests an extended logical
                 block called Exblock (Extended Block) and a table
                 called SNode (Snapshot Node) to reduce the mount time
                 and proposes a new journaling scheme to improve
                 stability for an enhanced file system for NAND flash
                 memory storage called ENFFiS (Enhanced NAND Flash
                 memory File System). It also proposes a new cache
                 policy to improve read/write performances. ENFFiS shows
                 better performance than existing file systems in terms
                 of reading, writing, mount time and stability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2013:TAT,
  author =       "Jiayin Li and Meikang Qiu and Jian-Wei Niu and
                 Laurence T. Yang and Yongxin Zhu and Zhong Ming",
  title =        "Thermal-aware task scheduling in {$3$D} chip
                 multiprocessor with real-time constrained workloads",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423642",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Chip multiprocessor (CMP) techniques have been
                 implemented in embedded systems due to tremendous
                 computation requirements. Three-dimension (3D) CMP
                 architecture has been studied recently for integrating
                 more functionalities and providing higher performance.
                 The high temperature on chip is a critical issue for
                 the 3D architecture. In this article, we propose an
                 online thermal prediction model for 3D chips. Using
                 this model, we propose novel task scheduling algorithms
                 based on rotation scheduling to reduce the peak
                 temperature on chip. We consider data dependencies,
                 especially inter-iteration dependencies that are not
                 well considered in most of the current thermal-aware
                 task scheduling algorithms. Our simulation results show
                 that our algorithms can efficiently reduce the peak
                 temperature up to 8.1$^^$ C.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paul:2013:VSI,
  author =       "Anand Paul and Bo-Wei Chen and Karunanithi
                 Bharanitharan and Jhing-Fa Wang",
  title =        "Video search and indexing with reinforcement agent for
                 interactive multimedia services",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423643",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this study, we present a video search and indexing
                 system based on the state support vector (SVM) network,
                 video graph, and reinforcement agent for recognizing
                 and organizing video events. In order to enhance the
                 recognition performance of the state SVM network, two
                 innovative techniques are presented: state transition
                 correction and transition quality estimation. The
                 classification results are also merged into the video
                 indexing graph, which facilitates the search speed. A
                 reinforcement algorithm with an efficient scheduling
                 scheme significantly reduces both the power consumption
                 and time. The experimental results show the proposed
                 state SVM network was able to achieve a precision rate
                 as high as 83.83\% and the query results of the
                 indexing graph reached 80\% accuracy. The experiments
                 also demonstrate the performance and feasibility of our
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nam:2013:PAR,
  author =       "Yunyoung Nam and Seungmin Rho and Chulung Lee",
  title =        "Physical activity recognition using multiple sensors
                 embedded in a wearable device",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423644",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a wearable intelligence
                 device for activity monitoring applications. We
                 developed and evaluated algorithms to recognize
                 physical activities from data acquired using a 3-axis
                 accelerometer with a single camera worn on a body. The
                 recognition process is performed in two steps: at first
                 the features for defining a human activity are measured
                 by the 3-axis accelerometer sensor and the image sensor
                 embedded in a wearable device. Then, the physical
                 activity corresponding to the measured features is
                 determined by applying the SVM classifier. The 3-axis
                 accelerometer sensor computes the correlation between
                 axes and the magnitude of the FFT for other features of
                 an activity. Acceleration data is classified into nine
                 activity labels. Through the image sensor, multiple
                 optical flow vectors computed on each grid image patch
                 are extracted as features for defining an activity. In
                 the experiments, we showed that an overall accuracy
                 rate of activity recognition based our method was
                 92.78\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lim:2013:DRS,
  author =       "Seung-Ho Lim and Min Choi and Young Sik Jeong",
  title =        "Data reorganization for scalable video service with
                 embedded mobile devices",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423645",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent development of high-speed wireless networks and
                 embedded systems has enabled the recording and delivery
                 of high-performance multimedia to heterogeneous mobile
                 users. To support heterogeneous mobile users with
                 high-quality multimedia services, scalable video coding
                 was introduced. In the scalable video coding (SVC),
                 through multidimensional scalability, all types of
                 these scalability can be exploited at the same time.
                 However, the generated video sequences of scalable
                 video coding are not adequate for mobile multimedia
                 service systems since its flexibility makes non
                 contiguous storing and retrieval of partial stream
                 data. In this article, we propose efficient scalable
                 video data reorganization for video servicing systems,
                 which consist of video server and mobile clients. For
                 video server, we reorganize scalable video streams
                 taking into account both of decoding dependency and
                 location in disk array storage, where disk array is
                 widely used for storage systems of video server. In the
                 mobile devices, we place substreams with the
                 consideration of NAND flash memory page and block
                 boundaries, which is storage for mobile devices. The
                 experimental results show that the proposed
                 reorganization of scalable video can improve the
                 performance of mobile multimedia service systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kang:2013:AEC,
  author =       "Hyeong-Ju Kang and Heesuk Seo and Jin Kwak",
  title =        "Area-efficient convolutional deinterleaver for mobile
                 {TV} receiver",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423646",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a single-pointer structure is
                 proposed for the convolutional deinterleavers of mobile
                 TV receivers. To enhance the burst-error correcting
                 capability, the convolutional interleaving and
                 deinterleaving scheme is widely used in mobile TV
                 receivers. However, a convolutional deinterleaver
                 requires many pointer registers. This article
                 introduces a single-pointer structure that reduces the
                 number of pointer registers. Experimental results show
                 that the single-pointer structure reduces the area of
                 the convolutional deinterleaver by 70\% in a mobile TV
                 receiver.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bharanitharan:2013:DMS,
  author =       "K. Bharanitharan and Jiun-Ren Ding and Anand Paul and
                 Kuen-Ming Lee and Ting-Wei Hou",
  title =        "Dependable management system for ubiquitous camera
                 array service in an elder-care center",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423647",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The concept of smart homes (SH) has been extensively
                 popularized, and there are a lot of technologies that
                 need to be continuously utilized and integrated in such
                 a concept. In this article, some applied problems of
                 camera array (CA) in the SH are discussed and solved.
                 Determining how to build an effective management method
                 for CA in order to ensure that user privacy is not
                 encroached upon is an important issue. In SH, the
                 applications of CA are very diversified. We suggest
                 that a satisfactory management method of CA should be
                 based on the open service gateway initiative (OSGi)
                 that includes resource management and monitoring (RMM)
                 and UPnP security for the problems of resources and
                 privacy, respectively. Finally, an applied example of
                 CA is addressed in an elder-care center (EC).
                 Simulation results show that the management strategy
                 and application of CA based on an OSGi is
                 satisfactory.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lai:2013:RBR,
  author =       "Chin-Feng Lai and Min Chen and Meikang Qiu and
                 Athanasios V. Vasilakos and Jong Hyuk Park",
  title =        "A {RF4CE}-based remote controller with interactive
                 graphical user interface applied to home automation
                 system",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "30:1--30:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423648",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the increase in commercial electronic equipment
                 and its complicated control interfaces, how to design
                 an effective and user-friendly control interface has
                 become a topic for many researchers. This research
                 introduces two-directional communication of an
                 interactive graphical user interface on a universal
                 remote control (URC). It is different from current URCs
                 where users must often spend huge amounts of time
                 setting the command codes and encoding each device.
                 With the increase in the number of appliances that the
                 controller needs to manage and the complicated and
                 numerous control buttons, using such controllers often
                 causes difficulties for users. This research employs a
                 cross-platform with integration theories, so when a
                 user wants to connect an appliance, both the appliance
                 end and the controller end will build a two-directional
                 connection through pairing over Radio Frequency for
                 Consumer Electronics (RF4CE). After connection, the
                 system will automatically set the communication
                 protocol between the controller and the device. The
                 appliance will automatically transmit its current state
                 and service in the form of bundles to the controller,
                 then the controller will project it onto an LCD screen.
                 The controller can also show the number of appliances
                 connected to the current position of the user, allowing
                 the user to use one controller to control all home
                 appliances with ease, achieving a simplified and
                 instinctive control interface to build the integrated
                 control environment for commercial appliances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Waluyo:2013:MQS,
  author =       "Agustinus Borgy Waluyo and David Taniar and Bala
                 Srinivasan and Wenny Rahayu",
  title =        "Mobile query services in a participatory embedded
                 sensing environment",
  journal =      j-TECS,
  volume =       "12",
  number =       "2",
  pages =        "31:1--31:??",
  month =        feb,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2423636.2423649",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 28 06:57:27 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A participatory mobile sensing system is designed to
                 enable clients to voluntarily collect environmental
                 data using embedded sensors and a mobile device while
                 going about their daily activities. Due to the
                 spatio-temporal nature of the data, and the significant
                 benefits of the data to the general public, it is
                 necessary to employ an efficient and effective query
                 processing model for the mobile clients to access the
                 data that can be visualized via an interactive
                 multimedia interface. This article introduces a unified
                 on-demand and data broadcast model to serve queries in
                 the context of a mobile sensing system. The
                 contributions of this article include the following:
                 (i) it presents a novel data structure and indexing
                 method to support the system; (ii) it provides
                 flexibility for the client to issue query using
                 on-demand or broadcast channel according to the server
                 load and broadcast schedule; (iii) it enables new data
                 access and processing for the mobile client; and (iv)
                 it is designed for a multiple channels/receivers
                 environment in a 4G wireless network. The proposed
                 model uses a holistic query processing approach for the
                 mobile sensing system that offers substantial
                 efficiency and autonomy for mobile clients when
                 retrieving data. The results of the experiments
                 undertaken affirm the effectiveness of its
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kirsch:2013:ISS,
  author =       "Christoph Kirsch and Vincent Mooney",
  title =        "Introduction to Special Section on Probabilistic
                 Embedded Computing",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "86:1--86:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465788",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Palem:2013:TYB,
  author =       "Krishna Palem and Avinash Lingamneni",
  title =        "Ten Years of Building Broken Chips: The Physics and
                 Engineering of Inexact Computing",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "87:1--87:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465789",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Well over a decade ago, many believed that an engine
                 of growth driving the semiconductor and computing
                 industries---captured nicely by Gordon Moore's
                 remarkable prophecy (Moore's law)---was speeding
                 towards a dangerous cliff-edge. Ranging from
                 expressions of concern to doomsday scenarios, the exact
                 time when serious hurdles would beset us varied quite a
                 bit---some of the more optimistic warnings giving
                 Moore's law until. Needless to say, a lot of people
                 have spent time and effort with great success to find
                 ways for substantially extending the time when we would
                 encounter the dreaded cliff-edge, if not avoiding it
                 altogether. Faced with this issue, we started
                 approaching this in a decidedly different manner---one
                 which suggested falling off the metaphorical cliff as a
                 design choice, but in a controlled way. This resulted
                 in devices that could switch and produce bits that are
                 correct, namely of having the intended value, only with
                 a probabilistic guarantee. As a result, the results
                 could in fact be incorrect. Such devices and associated
                 circuits and computing structures are now broadly
                 referred to as inexact designs, circuits, and
                 architectures. In this article, we will crystallize the
                 essence of inexactness dating back to 2002 through two
                 key principles that we developed: (i) that of admitting
                 error in a design in return for resource savings, and
                 subsequently (ii) making resource investments in the
                 elements of a hardware platform proportional to the
                 value of information they compute. We will also give a
                 broad overview of a range of inexact designs and
                 hardware concepts that our group and other groups
                 around the world have been developing since, based on
                 these two principles. Despite not being
                 deterministically precise, inexact designs can be
                 significantly more efficient in the energy they
                 consume, their speed of execution, and their area
                 needs, which makes them attractive in application
                 contexts that are resilient to error. Significantly,
                 our development of inexactness will be contrasted
                 against the rich backdrop of traditional approaches
                 aimed at realizing reliable computing from unreliable
                 elements, starting with von Neumann's influential
                 lectures and further developed by Shannon--Weaver and
                 others.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Misailovic:2013:PSP,
  author =       "Sasa Misailovic and Deokhwan Kim and Martin Rinard",
  title =        "Parallelizing Sequential Programs with Statistical
                 Accuracy Tests",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "88:1--88:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465790",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present QuickStep, a novel system for parallelizing
                 sequential programs. Unlike standard parallelizing
                 compilers (which are designed to preserve the semantics
                 of the original sequential computation), QuickStep is
                 instead designed to generate (potentially
                 nondeterministic) parallel programs that produce
                 acceptably accurate results acceptably often. The
                 freedom to generate parallel programs whose output may
                 differ (within statistical accuracy bounds) from the
                 output of the sequential program enables a dramatic
                 simplification of the compiler, a dramatic increase in
                 the range of applications that it can parallelize, and
                 a significant expansion in the range of parallel
                 programs that it can legally generate. Results from our
                 benchmark set of applications show that QuickStep can
                 automatically generate acceptably accurate and
                 efficient parallel programs---the automatically
                 generated parallel versions of five of our six
                 benchmark applications run between 5.0 and 7.8 times
                 faster on eight cores than the original sequential
                 versions. These applications and parallelizations
                 contain features (such as the use of modern
                 object-oriented programming constructs or desirable
                 parallelizations with infrequent but acceptable data
                 races) that place them inherently beyond the reach of
                 standard approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sartori:2013:ETE,
  author =       "John Sartori and Rakesh Kumar",
  title =        "Exploiting Timing Error Resilience in Processor
                 Architecture",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "89:1--89:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465791",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Escalating variations in modern CMOS designs have
                 become a threat to Moore's law. In light of the
                 increasing costs of standard worst-case design
                 practices, timing speculation has become a popular
                 approach for dealing with static and dynamic
                 non-determinism and increasing yield. Timing
                 speculative architectures allow conservative guardbands
                 to be relaxed, increasing efficiency at the expense of
                 occasional errors, which are corrected or tolerated by
                 an error resilience mechanism. Previous work has
                 proposed circuit- or design-level optimizations that
                 manipulate the error rate behavior of a design to
                 increase the efficiency of timing speculation. In this
                 article, we investigate whether architectural
                 optimizations can also manipulate error rate behavior
                 to significantly increase the effectiveness of timing
                 speculation. To this end, we demonstrate how error rate
                 behavior indeed depends on processor architecture and
                 that architectural optimizations can be used to
                 manipulate the error rate behavior of a processor.
                 Using timing speculation-aware architectural
                 optimizations, we demonstrate enhanced overscaling and
                 up to 29\% additional energy savings for processors
                 that employ Razor-based timing speculation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chippa:2013:MQV,
  author =       "Vinay K. Chippa and Kaushik Roy and Srimat T.
                 Chakradhar and Anand Raghunathan",
  title =        "Managing the Quality vs. Efficiency Trade-off Using
                 Dynamic Effort Scaling",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "90:1--90:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465792",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Several current and emerging applications do not have
                 a unique result for a given input; rather, functional
                 correctness is defined in terms of output quality.
                 Recently proposed design techniques exploit the
                 inherent resilience of such applications and achieve
                 improved efficiency (energy or performance) by
                 foregoing correct execution of all the constituent
                 computations. Hardware and software systems that are
                 thus designed may be viewed as scalable effort systems,
                 since they offer the capability to modulate the effort
                 that they expend towards computation, thereby allowing
                 for trade-offs between output quality and efficiency.
                 We propose the concept of Dynamic Effort Scaling (DES),
                 which refers to dynamic management of the control knobs
                 that are exposed by scalable effort systems. We argue
                 the need for DES by observing that the degree of
                 resilience often varies significantly across
                 applications, across datasets, and even within a
                 dataset. We propose a general conceptual framework for
                 DES by formulating it as a feedback control problem,
                 wherein the scaling mechanisms are regulated with the
                 goal of maintaining output quality at or above a
                 specified limit. We present an implementation of
                 Dynamic Effort Scaling for recognition and mining
                 applications and evaluate it for the support vector
                 machines and K-means clustering algorithms under
                 various application scenarios and datasets. Our results
                 clearly demonstrate the benefits of the proposed
                 approach---statically setting the scaling mechanisms
                 leads to either significant error overshoot or
                 significant opportunities for energy savings left on
                 the table unexploited. In contrast, DES is able to
                 effectively regulate the output quality while maximally
                 exploiting the time-varying resiliency in the
                 workload.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Salajegheh:2013:HWS,
  author =       "Mastooreh Salajegheh and Yue Wang and Anxiao (Andrew)
                 Jiang and Erik Learned-Miller and Kevin Fu",
  title =        "Half-Wits: Software Techniques for Low-Voltage
                 Probabilistic Storage on Microcontrollers with {NOR}
                 Flash Memory",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "91:1--91:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465793",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This work analyzes the stochastic behavior of writing
                 to embedded flash memory at voltages lower than
                 recommended by a microcontroller's specifications in
                 order to reduce energy consumption. Flash memory
                 integrated within a microcontroller typically requires
                 the entire chip to operate on a common supply voltage
                 almost twice as much as what the CPU portion requires.
                 Our software approach allows the flash memory to
                 tolerate a lower supply voltage so that the CPU may
                 operate in a more energy-efficient manner.
                 Energy-efficient coding algorithms then cope with flash
                 memory writes that behave unpredictably. Our
                 software-only coding algorithms (in-place writes,
                 multiple-place writes, RS-Berger codes, and slow
                 writes) enable reliable storage at low voltages on
                 unmodified hardware by exploiting the electrically
                 cumulative nature of half-written data in write-once
                 bits. For a sensor monitoring application using the
                 MSP430, coding with in-place writes reduces the overall
                 energy consumption by 34\%. In-place writes are
                 competitive when the time spent on low-voltage
                 operations such as computation are at least four times
                 greater than the time spent on writes to flash memory.
                 Our evaluation shows that tightly maintaining the
                 digital abstraction for storage in embedded flash
                 memory comes at a significant cost to energy
                 consumption with minimal gain in reliability. We find
                 our techniques most effective for embedded workloads
                 that have significant duty cycling, rare writes, or
                 energy harvesting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alaghi:2013:SSC,
  author =       "Armin Alaghi and John P. Hayes",
  title =        "Survey of Stochastic Computing",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "92:1--92:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465794",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Stochastic computing (SC) was proposed in the 1960s as
                 a low-cost alternative to conventional binary
                 computing. It is unique in that it represents and
                 processes information in the form of digitized
                 probabilities. SC employs very low-complexity
                 arithmetic units which was a primary design concern in
                 the past. Despite this advantage and also its inherent
                 error tolerance, SC was seen as impractical because of
                 very long computation times and relatively low
                 accuracy. However, current technology trends tend to
                 increase uncertainty in circuit behavior and imply a
                 need to better understand, and perhaps exploit,
                 probability in computation. This article surveys SC
                 from a modern perspective where the small size, error
                 resilience, and probabilistic features of SC may
                 compete successfully with conventional methodologies in
                 certain applications. First, we survey the literature
                 and review the key concepts of stochastic number
                 representation and circuit structure. We then describe
                 the design of SC-based circuits and evaluate their
                 advantages and disadvantages. Finally, we give examples
                 of the potential applications of SC and discuss some
                 practical problems that are yet to be solved.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lingamneni:2013:SPI,
  author =       "Avinash Lingamneni and Christian Enz and Krishna Palem
                 and Christian Piguet",
  title =        "Synthesizing Parsimonious Inexact Circuits through
                 Probabilistic Design Techniques",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "93:1--93:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465795",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The domain of inexact circuit design, in which
                 accuracy of the circuit can be exchanged for
                 substantial cost (energy, delay, and/or area) savings,
                 has been gathering increasing prominence of late owing
                 to a growing desire for reducing energy consumption of
                 the systems, particularly in the domain of embedded and
                 (portable) multimedia applications. Most of the
                 previous approaches to realizing inexact circuits
                 relied on scaling of circuit parameters (such as supply
                 voltage) taking advantage of an application's error
                 tolerance to achieve the cost and accuracy trade-offs,
                 thus suffering from acute drawbacks of considerable
                 implementation overheads that significantly reduced the
                 gains. In this article, two novel design approaches
                 called Probabilistic Pruning and Probabilistic Logic
                 Minimization are proposed to realize inexact circuits
                 with zero hardware overhead.Extensive simulations on
                 various architectures of critical datapath elements
                 demonstrate that each of the techniques can
                 independently achieve normalized gains as large as $ 2
                 \times $--$ 9.5 \times $ in energy-delay-area product
                 for relative error magnitude as low as $ 10^{-4} $--$
                 {10 - 8} $ \% compared to corresponding conventional
                 correct circuits.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cazorla:2013:PPA,
  author =       "Francisco J. Cazorla and Eduardo Qui{\~n}ones and
                 Tullio Vardanega and Liliana Cucu and Benoit Triquet
                 and Guillem Bernat and Emery Berger and Jaume Abella
                 and Franck Wartel and Michael Houston and Luca
                 Santinelli and Leonidas Kosmidis and Code Lo and Dorin
                 Maxim",
  title =        "{PROARTIS}: Probabilistically Analyzable Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "94:1--94:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465796",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Static timing analysis is the state-of-the-art
                 practice of ascertaining the timing behavior of
                 current-generation real-time embedded systems. The
                 adoption of more complex hardware to respond to the
                 increasing demand for computing power in
                 next-generation systems exacerbates some of the
                 limitations of static timing analysis. In particular,
                 the effort of acquiring (1) detailed information on the
                 hardware to develop an accurate model of its execution
                 latency as well as (2) knowledge of the timing behavior
                 of the program in the presence of varying hardware
                 conditions, such as those dependent on the history of
                 previously executed instructions. We call these
                 problems the timing analysis walls. In this
                 vision-statement article, we present probabilistic
                 timing analysis, a novel approach to the analysis of
                 the timing behavior of next-generation real-time
                 embedded systems. We show how probabilistic timing
                 analysis attacks the timing analysis walls; we then
                 illustrate the mathematical foundations on which this
                 method is based and the challenges we face in the
                 effort of efficiently implementing it. We also present
                 experimental evidence that shows how probabilistic
                 timing analysis reduces the extent of knowledge about
                 the execution platform required to produce
                 probabilistically accurate WCET estimations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Abbas:2013:PTL,
  author =       "Houssam Abbas and Georgios Fainekos and Sriram
                 Sankaranarayanan and Franjo Ivanci{\'c} and Aarti
                 Gupta",
  title =        "Probabilistic Temporal Logic Falsification of
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "95:1--95:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465797",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a Monte-Carlo optimization technique for
                 finding system behaviors that falsify a metric temporal
                 logic (MTL) property. Our approach performs a random
                 walk over the space of system inputs guided by a
                 robustness metric defined by the MTL property.
                 Robustness is guiding the search for a falsifying
                 behavior by exploring trajectories with smaller
                 robustness values. The resulting testing framework can
                 be applied to a wide class of cyber-physical systems
                 (CPS). We show through experiments on complex system
                 models that using our framework can help automatically
                 falsify properties with more consistency as compared to
                 other means, such as uniform sampling.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Forte:2013:ETA,
  author =       "Domenic Forte and Ankur Srivastava",
  title =        "Energy- and Thermal-Aware Video Coding via
                 Encoder\slash Decoder Workload Balancing",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "96:1--96:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465798",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Video coding and compression are essential components
                 of multimedia services but are known to be
                 computationally intensive and energy demanding.
                 Traditional video coding paradigms, predictive and
                 distributed video coding (PVC and DVC), result in
                 excessive computation at either the encoder (PVC) or
                 decoder (DVC). Several recent papers have proposed a
                 hybrid PVC/DVC codec which shares the video coding
                 workload between encoder and decoder. In this article,
                 we propose a controller for such hybrid coders that
                 considers energy and temperature to dynamically split
                 the coding workload of a system comprised of one
                 encoder and one decoder. We also present two heuristic
                 algorithms for determining safe operating temperatures
                 in the controller solution: (1) stable state thermal
                 modeling algorithm, which focuses on long term
                 temperatures, and (2) transient thermal modeling
                 algorithm, which is better for short-term thermal
                 behavior. Results show that the proposed algorithms
                 result in more balanced energy utilization, improve
                 overall system lifetime, and reduce operating
                 temperatures when compared to strictly PVC and DVC
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Uzelac:2013:HBL,
  author =       "Vladimir Uzelac and Aleksandar Milenkovi{\'c}",
  title =        "Hardware-Based Load Value Trace Filtering for
                 On-the-Fly Debugging",
  journal =      j-TECS,
  volume =       "12",
  number =       "2s",
  pages =        "97:1--97:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2465787.2465799",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 6 06:53:32 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Capturing program and data traces during program
                 execution unobtrusively on-the-fly is crucial in
                 debugging and testing of cyber-physical systems.
                 However, tracing a complete program unobtrusively is
                 often cost-prohibitive, requiring large on-chip trace
                 buffers and wide trace ports. This article describes a
                 new hardware-based load data value filtering technique
                 called Cache First-access Tracking. Coupled with an
                 effective variable encoding scheme, this technique
                 achieves a significant reduction of load data value
                 traces, from 5.86 to 56.39 times depending on the data
                 cache size, thus enabling cost-effective, unobtrusive
                 on-the-fly tracing and debugging.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2013:SAE,
  author =       "Fengxiang Zhang and Alan Burns",
  title =        "Schedulability analysis of {EDF}-scheduled embedded
                 real-time systems with resource sharing",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "67:1--67:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Earliest Deadline First (EDF) is the most widely
                 studied optimal dynamic scheduling algorithm for
                 uniprocessor real-time systems. In the existing
                 literature, however, there is no complete exact
                 analysis for EDF scheduling when both resource sharing
                 and release jitter are considered. Since resource
                 sharing and release jitter are important
                 characteristics of embedded real-time systems, a solid
                 theoretical foundation should be provided for EDF
                 scheduled systems. In this paper, we extend traditional
                 processor demand analysis to let arbitrary deadline
                 real-time tasks share non-preemptable resources and
                 suffer release jitter. A complete and exact
                 schedulability analysis for EDF scheduled systems is
                 provided. This analysis is incorporated into QPA (Quick
                 Processor-demand Analysis) which provides an efficient
                 implementation of the exact test.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ben-Asher:2013:UMP,
  author =       "Yosi Ben-Asher and Nadav Rotem",
  title =        "Using memory profile analysis for automatic synthesis
                 of pointers code",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "68:1--68:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One of the main advantages of high-level synthesis
                 (HLS) is the ability to synthesize circuits that can
                 access multiple memory banks in parallel. Current HLS
                 systems synthesize parallel memory references based on
                 explicit array declarations in the source code. We
                 consider the need to synthesize not only array
                 references but also memory operations targeting
                 pointers and dynamic data structures. This paper
                 describes Automatic Memory Partitioning, a method for
                 automatically synthesizing general data structures
                 (arrays and pointers) into multiple memory banks for
                 increased parallelism and performance. We use source
                 code instrumentation to collect memory traces in order
                 to detect linear memory access patterns. The memory
                 traces are used to split data structures into disjoint
                 memory regions and determine which segments may benefit
                 from parallel memory access. We present an algorithm
                 for allocating memory segments into multiple memory
                 banks. Experiments show significant improvements in
                 performance while conserving the number of memory
                 banks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2013:RAB,
  author =       "Fumin Zhang and Zhenwu Shi and Shayok Mukhopadhyay",
  title =        "Robustness analysis for battery-supported
                 cyber-physical systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "69:1--69:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article establishes a novel analytical approach
                 to quantify robustness of scheduling and battery
                 management for battery supported cyber-physical
                 systems. A dynamic schedulability test is introduced to
                 determine whether tasks are schedulable within a finite
                 time window. The test is used to measure robustness of
                 a real-time scheduling algorithm by evaluating the
                 strength of computing time perturbations that break
                 schedulability at runtime. Robustness of battery
                 management is quantified analytically by an adaptive
                 threshold on the state of charge. The adaptive
                 threshold significantly reduces the false alarm rate
                 for battery management algorithms to decide when a
                 battery needs to be replaced.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Voros:2013:MHD,
  author =       "Nikolaos S. Voros and Michael H{\"u}bner and
                 J{\"u}rgen Becker and Matthias K{\"u}hnle and Florian
                 Thomaitiv and Arnaud Grasset and Paul Brelet and
                 Philippe Bonnot and Fabio Campi and Eberhard
                 Sch{\"u}ler and Henning Sahlbach and Sean Whitty and
                 Rolf Ernst and Enrico Billich and Claudia Tischendorf
                 and Ulrich Heinkel and Frank Ieromnimon and Dimitrios
                 Kritharidis and Axel Schneider and Joachim Knaeblein
                 and Wolfram Putzke-R{\"o}ming",
  title =        "{MORPHEUS}: a heterogeneous dynamically reconfigurable
                 platform for designing highly complex embedded
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "70:1--70:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, system designers are facing the challenge of
                 developing systems that have diverse features, are more
                 complex and more powerful, with less power consumption
                 and reduced time to market. These contradictory
                 constraints have forced technology providers to pursue
                 design solutions that will allow design teams to meet
                 the above design targets. In that respect, this paper
                 introduces an innovative technology platform, called
                 MORPHEUS, which intents to provide complete design
                 framework for dealing with the aforementioned
                 challenges. MORPHEUS consists of a state of the art
                 architecture that encompasses heterogeneous
                 reconfigurable accelerators for implementing on the
                 same hardware architecture applications with varying
                 characteristics and a tool chain that, through a
                 software oriented approach, eases the implementation of
                 highly complex applications with heterogeneous
                 characteristics. The proposed approach has been tested
                 and evaluated through state of the art cases studies
                 borrowed from complementary application domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Crenne:2013:CMS,
  author =       "J{\'e}r{\'e}mie Crenne and Romain Vaslin and Guy
                 Gogniat and Jean-Philippe Diguet and Russell Tessier
                 and Deepak Unnikrishnan",
  title =        "Configurable memory security in embedded systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "71:1--71:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "System security is an increasingly important design
                 criterion for many embedded systems. These systems are
                 often portable and more easily attacked than
                 traditional desktop and server computing systems. Key
                 requirements for system security include defenses
                 against physical attacks and lightweight support in
                 terms of area and power consumption. Our new approach
                 to embedded system security focuses on the protection
                 of application loading and secure application
                 execution. During secure application loading, an
                 encrypted application is transferred from on-board
                 flash memory to external double data rate synchronous
                 dynamic random access memory (DDR-SDRAM) via a
                 microprocessor. Following application loading, the
                 core-based security technique provides both
                 confidentiality and authentication for data stored in a
                 microprocessor's system memory. The benefits of our low
                 overhead memory protection approaches are demonstrated
                 using four applications implemented in a
                 field-programmable gate array (FPGA) in an embedded
                 system prototyping platform. Each application requires
                 a collection of tasks with varying memory security
                 requirements. The configurable security core
                 implemented on-chip inside the FPGA with the
                 microprocessor allows for different memory security
                 policies for different application tasks. An average
                 memory saving of 63\% is achieved for the four
                 applications versus a uniform security approach. The
                 lightweight circuitry included to support application
                 loading from flash memory adds about 10\% FPGA area
                 overhead to the processor-based system and main memory
                 security hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2013:AEE,
  author =       "Shaoshan Liu and Richard Neil Pittman and Alessandro
                 Forin and Jean-Luc Gaudiot",
  title =        "Achieving energy efficiency through runtime partial
                 reconfiguration on reconfigurable systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "72:1--72:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One major advantage of reconfigurable computing
                 systems is their ability to reconfigure hardware at
                 runtime. In this paper, we study the feasibility of
                 achieving energy efficiency in reconfigurable computing
                 systems (e.g., FPGAs) through runtime partial
                 reconfiguration (PR) techniques. In the ideal scenario,
                 we use a hardware accelerator to accelerate certain
                 parts of the program execution; when the accelerator is
                 not active, we use partial reconfiguration to unload it
                 to reduce power consumption. Since the reconfiguration
                 process may introduce a high energy overhead, it is
                 unclear whether this approach is efficient. To approach
                 this problem, we first analytically identify the
                 conditions under which partial reconfiguration can
                 reduce energy consumption. Our results indicate that
                 the key to reduce partial reconfiguration energy
                 overhead is to minimize the time overhead of the
                 reconfiguration process. Based on this analysis, we
                 design and implement a fast reconfiguration engine that
                 achieves close-to-ideal throughput on Xilinx Virtex-4
                 FPGAs. Our fast reconfiguration engine utilizes a
                 master-slave DMA pair to stream data between the SRAM
                 and the Internal Configuration Access Port (ICAP). We
                 experimentally verify our proposed solutions and
                 compare our design to existing energy reduction
                 techniques, such as clock gating. The results of our
                 study show that by using partial reconfiguration to
                 eliminate the power consumption of the accelerator when
                 it is inactive, we can accelerate program execution and
                 at the same time reduce the overall energy consumption
                 by half.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2013:PRS,
  author =       "Qi Dong and Donggang Liu and Peng Ning",
  title =        "Providing {DoS} resistance for signature-based
                 broadcast authentication in sensor networks",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "73:1--73:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent studies have demonstrated that it is feasible
                 to perform public key cryptographic operations on
                 resource-constrained sensor platforms. However, the
                 significant energy consumption introduced by public key
                 operations makes any public key-based protocol an easy
                 target of Denial-of-Service (DoS) attacks. For example,
                 if digital signature schemes such as ECDSA are used
                 directly for broadcast authentication without further
                 protection, an attacker can simply broadcast fake
                 messages and force the receiving nodes to perform a
                 huge number of unnecessary signature verifications,
                 eventually exhausting their battery power. This paper
                 shows how to mitigate such DoS attacks when digital
                 signatures are used for broadcast authentication in
                 sensor networks. Specifically, this paper first
                 presents two filtering techniques, the group-based
                 filter and the key chain-based filter, to handle the
                 DoS attacks against signature verification. Both
                 methods can significantly reduce the number of
                 unnecessary signature verifications when a sensor node
                 is under DoS attacks. This paper then combines these
                 two filters and proposes a hybrid solution to further
                 improve the performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Falk:2013:RBQ,
  author =       "Joachim Falk and Christian Zebelein and Christian
                 Haubelt and J{\"u}rgen Teich",
  title =        "A rule-based quasi-static scheduling approach for
                 static islands in dynamic dataflow graphs",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "74:1--74:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, an efficient rule-based clustering
                 algorithm for static dataflow subgraphs in a dynamic
                 dataflow graph is presented. The clustered static
                 dataflow actors are quasi-statically scheduled, in such
                 a way that the global performance in terms of latency
                 and throughput is improved compared to a dynamically
                 scheduled execution, while avoiding the introduction of
                 deadlocks as generated by naive static scheduling
                 approaches. The presented clustering algorithm
                 outperforms previously published approaches by a faster
                 computation and more compact representation of the
                 derived quasi-static schedule. This is achieved by a
                 rule-based approach, which avoids an explicit
                 enumeration of the state space. A formal proof of the
                 correctness of the presented clustering approach is
                 given. Experimental results show significant
                 improvements in both, performance and code size,
                 compared to a state-of-the-art clustering algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ost:2013:PAD,
  author =       "Luciano Ost and Marcelo Mandelli and Gabriel Marchesan
                 Almeida and Leandro Moller and Leandro Soares Indrusiak
                 and Gilles Sassatelli and Pascal Benoit and Manfred
                 Glesner and Michel Robert and Fernando Moraes",
  title =        "Power-aware dynamic mapping heuristics for {NoC}-based
                 {MPSoCs} using a unified model-based approach",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "75:1--75:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The mapping of tasks to processing elements of an
                 MPSoC has critical impact on system performance and
                 energy consumption. To cope with complex dynamic
                 behavior of applications, it is common to perform task
                 mapping during runtime so that the utilization of
                 processors and interconnect can be taken into account
                 when deciding the allocation of each task. This paper
                 has two major contributions, one of them targeting the
                 general problem of evaluating dynamic mapping
                 heuristics in NoC-based MPSoCs, and another focusing on
                 the specific problem of finding a task mapping that
                 optimizes energy consumption in those architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2013:JVP,
  author =       "Tiantian Liu and Chun Jason Xue and Minming Li",
  title =        "Joint variable partitioning and bank selection
                 instruction optimization for partitioned memory
                 architectures",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "76:1--76:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "About 55\% of all CPUs sold in the world are 8-bit
                 microcontrollers or microprocessors which can only
                 access limited memory space without extending address
                 buses. Partitioned memory with bank switching is a
                 technique to increase memory size without extending
                 address buses. Bank Selection Instructions (BSLs) need
                 to be inserted into the original programs to modify the
                 bank register to point to the desired banks. These BSLs
                 introduce both code size and execution time overheads.
                 In this paper, we partition variables into different
                 banks and insert BSLs at different positions of
                 programs so that the overheads can be minimized.
                 Minimizing speed (execution time) overhead and
                 minimizing space (code size) overhead are two
                 objectives investigated in this paper. A multi-copy
                 approach is also proposed to store multiple copies of
                 several variables on different banks when the memory
                 space allows. It takes the read/write properties of
                 variables into consideration and achieves more BSL
                 overhead reduction. Experiments show that the proposed
                 algorithms can reduce BSL overheads effectively
                 compared to state-of-the-art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2013:WAR,
  author =       "Jingtong Hu and Chun Jason Xue and Qingfeng Zhuge and
                 Wei-Che Tseng and Edwin H.-M. Sha",
  title =        "Write activity reduction on non-volatile main memories
                 for embedded chip multiprocessors",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "77:1--77:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent advances in circuit and semiconductor
                 technologies have pushed Non-Volatile Memory (NVM)
                 technologies into a new era. These technologies exhibit
                 appealing properties such as low power consumption,
                 non-volatility, shock-resistivity, and high density.
                 However, there are challenges to which we need answers
                 in the road of applying non-volatile memories as main
                 memory in embedded computer systems. First, when
                 compared with DRAM, NVMs have a limited number of
                 write/erase cycles. Second, write activities on NVM are
                 more expensive than DRAM memory in terms of energy
                 consumption and access latency. Both challenges will
                 benefit from the reduction of the write activities on
                 the NVMs. In this paper, we target embedded Chip
                 Multiprocessors (CMPs) with Scratch Pad Memory (SPM)
                 and non-volatile main memory. We introduce scheduling,
                 data migration, and recomputation techniques to reduce
                 the number of write activities on NVMs. Experimental
                 results show that the proposed methods can reduce the
                 number of writes by 58.46\% on average, which means
                 that the NVM can last 2.8 times as long as before. For
                 Phase Change Memory (PCM), the lifetime is extended
                 from 2.5 years to about 7 years on average and 15 years
                 at the most. Also, the finish time of the tested
                 programs is reduced by an average of 38.07\%, and the
                 energy consumption is reduced by an average of
                 51.23\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baruah:2013:PST,
  author =       "Sanjoy Baruah",
  title =        "Partitioning sporadic task systems upon
                 memory-constrained multiprocessors",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "78:1--78:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most prior theoretical research on real-time
                 partitioning algorithms for multiprocessor platforms
                 has focused on ensuring that the cumulative computing
                 requirements of the tasks assigned to each processor
                 does not exceed the processor's processing power.
                 However, computing capacity is often not the only
                 limiting resource: on many multiprocessor platforms
                 each individual computing unit may have limited amounts
                 of multiple additional types of resources (such as
                 local memory) in addition to having limited processing
                 power. We present algorithms for partitioning a
                 collection of sporadic tasks, each characterized by a
                 WCET, a relative deadline, and a period, upon a
                 multiprocessor platform in a manner that is cognizant
                 of such additional constraints as well as the
                 processing capacity constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paolieri:2013:HRT,
  author =       "Marco Paolieri and J{\"o}rg Mische and Stefan Metzlaff
                 and Mike Gerdes and Eduardo Qui{\~n}ones and Sascha
                 Uhrig and Theo Ungerer and Francisco J. Cazorla",
  title =        "A hard real-time capable multi-core {SMT} processor",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "79:1--79:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hard real-time applications in safety critical domains
                 require high performance and time analyzability.
                 Multi-core processors are an answer to these demands,
                 however task interferences make multi-cores more
                 difficult to analyze from a worst-case execution time
                 point of view than single-core processors. We propose a
                 multi-core SMT processor that ensures a bounded maximum
                 delay a task can suffer due to inter-task
                 interferences. Multiple hard real-time tasks can be
                 executed on different cores together with additional
                 non real-time tasks. Our evaluation shows that the
                 proposed MERASA multi-core provides predictability for
                 hard real-time tasks and also high performance for non
                 hard real-time tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yun:2013:DHS,
  author =       "Jeong-Han Yun and Chul-Joo Kim and Seonggun Kim and
                 Kwang-Moo Choe and Taisook Han",
  title =        "Detection of harmful schizophrenic statements in
                 {Esterel}",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "80:1--80:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In imperative synchronous languages, a statement is
                 called schizophrenic if it is executed more than once
                 in a single clock. When a schizophrenic statement is
                 translated into a circuit, the circuit can behave
                 abnormally because of the multiple executions. To solve
                 the problems caused by schizophrenic statements,
                 compilers duplicate the statements to avoid multiple
                 executions. Esterel is an imperative synchronous
                 language. Schizophrenic statements in Esterel are
                 considered to occur due to the instantaneous reentrance
                 of local signal declarations or parallel statements.
                 However, if the corresponding circuit of a
                 schizophrenic statement behaves normally, it is
                 harmless and thus curing is not necessary. In this
                 paper, we identify the conditions under which a
                 schizophrenic statement of the Esterel program must be
                 cured during circuit translation. We also propose an
                 algorithm to detect schizophrenic statements that have
                 to be cured on the control flow graphs (CFGs) of source
                 codes. Our algorithm detects all schizophrenic
                 statements that have to be cured and results in fewer
                 false alarms on the benchmark programs used in the
                 previous work. It is simple and based on the CFG of a
                 program, implying that it can be merged into existing
                 compilers easily.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baek:2013:EEH,
  author =       "Seungjae Baek and Jongmoo Choi and Donghee Lee and Sam
                 H. Noh",
  title =        "Energy-efficient and high-performance software
                 architecture for storage class memory",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "81:1--81:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, interest in incorporating Storage Class
                 Memory (SCM), which blurs the distinction between
                 memory and storage, into mainstream computing has been
                 increasing rapidly. In this paper, we address the
                 emerging questions regarding the use of SCM. Based on
                 an embedded platform that employs FeRAM, a type of SCM,
                 we present our findings. In summary, by introducing
                 SCM, power efficiency improves while performance is
                 degraded. We also show that such performance
                 degradations may be removed with operating system level
                 schemes that fully exploit the characteristics of SCM.
                 Finally, we present permanent computing that supports
                 lightweight system on/off capabilities by using SCM.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2013:HPL,
  author =       "Dongwon Lee and Marilyn Wolf and Shuvra S.
                 Bhattacharyya",
  title =        "High-performance and low-energy buffer mapping method
                 for multiprocessor {DSP} systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "82:1--82:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "When implementing digital signal processing (DSP)
                 applications onto multiprocessor systems, one
                 significant problem in the viewpoints of performance is
                 the memory wall. In this paper, to help alleviate the
                 memory wall problem, we propose a novel,
                 high-performance buffer mapping policy for
                 SDF-represented DSP applications on bus-based
                 multiprocessor systems that support the shared-memory
                 programming model. The proposed policy exploits the
                 bank concurrency of the DRAM main memory system
                 according to the analysis of hierarchical parallelism.
                 Energy consumption is also a critical parameter,
                 especially in battery-based embedded computing systems.
                 In this paper, we apply a synchronization back-off
                 scheme on the top of the proposed high-performance
                 buffer mapping policy to reduce energy consumption. The
                 energy saving is attained by minimizing the number of
                 non-essential synchronization transactions. We measure
                 throughput and energy consumption on both synthetic and
                 real benchmarks. The simulation results show that the
                 proposed buffer mapping policy is very useful in terms
                 of performance, especially in memory-intensive
                 applications where the total execution time of
                 computational tasks is relatively small compared to
                 that of memory operations. In addition, the proposed
                 synchronization back-off scheme provides a reduction in
                 the number of synchronization transactions without
                 degrading performance, which results in system energy
                 saving.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tripakis:2013:CSD,
  author =       "Stavros Tripakis and Dai Bui and Marc Geilen and Bert
                 Rodiers and Edward A. Lee",
  title =        "Compositionality in synchronous data flow: Modular
                 code generation from hierarchical {SDF} graphs",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "83:1--83:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hierarchical SDF models are not compositional: a
                 composite SDF actor cannot be represented as an atomic
                 SDF actor without loss of information that can lead to
                 rate inconsistency or deadlock. Motivated by the need
                 for incremental and modular code generation from
                 hierarchical SDF models, we introduce in this paper
                 DSSF profiles. DSSF (Deterministic SDF with Shared
                 FIFOs) forms a compositional abstraction of composite
                 actors that can be used for modular compilation. We
                 provide algorithms for automatic synthesis of
                 non-monolithic DSSF profiles of composite actors given
                 DSSF profiles of their sub-actors. We show how
                 different trade-offs can be explored when synthesizing
                 such profiles, in terms of compactness (keeping the
                 size of the generated DSSF profile small) versus
                 reusability (maintaining necessary information to
                 preserve rate consistency and deadlock-absence) as well
                 as algorithmic complexity. We show that our method
                 guarantees maximal reusability and report on a
                 prototype implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zimmerman:2013:MBR,
  author =       "Andrew T. Zimmerman and Jerome P. Lynch and Frank T.
                 Ferrese",
  title =        "Market-based resource allocation for distributed data
                 processing in wireless sensor networks",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "84:1--84:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In recent years, improved wireless technologies have
                 enabled the low-cost deployment of large numbers of
                 sensors for a wide range of monitoring applications.
                 Because of the computational resources (processing
                 capability, storage capacity, etc.) collocated with
                 each sensor in a wireless network, it is often possible
                 to perform advanced data analysis tasks autonomously
                 and in-network, eliminating the need for the
                 post-processing of sensor data. With new parallel
                 algorithms being developed for in-network computation,
                 it has become necessary to create a framework in which
                 all of a wireless network's scarce resources (CPU time,
                 wireless bandwidth, storage capacity, battery power,
                 etc.) can be best utilized in the midst of competing
                 computational requirements. In this study, a
                 market-based method is developed to autonomously
                 distribute these scarce network resources across
                 various computational tasks with competing objectives
                 and/or resource demands. This method is experimentally
                 validated on a network of wireless sensing prototypes,
                 where it is shown to be capable of Pareto-optimally
                 allocating scarce network resources. Then, it is
                 applied to the real-world problem of rupture detection
                 in shipboard chilled water systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mu:2013:POS,
  author =       "Jingqing Mu and Karthik Shankar and Roman Lysecky",
  title =        "Profiling and online system-level performance and
                 power estimation for dynamically adaptable embedded
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "3",
  pages =        "85:1--85:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue May 28 17:38:27 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Significant research has demonstrated the performance
                 and power benefits of runtime dynamic reconfiguration
                 of FPGAs and microprocessor/FPGA devices. For
                 dynamically reconfigurable systems, in which the
                 selection of hardware coprocessors to implement within
                 the FPGA is determined at runtime, online estimation
                 methods are needed to evaluate the performance and
                 power consumption impact of the hardware coprocessor
                 selection. In this paper, we present a profile assisted
                 online system-level performance and power estimation
                 framework for estimating the speedup and power
                 consumption of dynamically reconfigurable embedded
                 systems. We evaluate the accuracy and fidelity of our
                 online estimation framework for dynamic hardware kernel
                 selection to maximize performance or minimize the
                 system power consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jafari:2013:ISS,
  author =       "Roozbeh Jafari and John Lach and Majid Sarrafzadeh and
                 William Kaiser",
  title =        "Introduction to the special section on wireless health
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "98:1--98:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485986",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wijsman:2013:TME,
  author =       "Jacqueline Wijsman and Bernard Grundlehner and Julien
                 Penders and Hermie Hermens",
  title =        "Trapezius muscle {EMG} as predictor of mental stress",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "99:1--99:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485987",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Stress is a growing problem in society and can cause
                 musculoskeletal complaints. It would be useful to
                 measure stress for prevention of stress-related health
                 problems. An experiment is described in which EMG
                 signals of the upper trapezius muscle were measured
                 with a wireless system during three different stressful
                 conditions: a calculation task (the Norinder test), a
                 logical puzzle task and a memory task. The latter two
                 tests were newly designed and aimed at creating
                 circumstances that are similar to work stress.
                 Amplitudes of the EMG signals were significantly higher
                 during stress compared to rest (+2.6\% of reference
                 contraction level) and relative time with EMG gaps was
                 lower during stress (-14.3\% of time). Also, mean and
                 median frequencies were significantly lower during
                 stress than during rest (-8.6 and -8.8 Hz,
                 respectively). EMG amplitude increased not only from
                 rest to stress conditions, but also during stressful
                 conditions and decreased during relaxation periods. EMG
                 features correlated with subjectively indicated stress
                 levels (correlations of 0.32 with RMS and -0.32 with
                 relative gaptime). The results indicate that EMG is a
                 useful parameter to detect stress. Together with other
                 physiological sensors, EMG sensors can be included in a
                 wireless system for ambulatory monitoring of stress
                 levels.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wouhaybi:2013:ECM,
  author =       "Rita H. Wouhaybi and Mark D. Yarvis and Sangita Sharma
                 and Philip Muse and Chieh-Yih Wan and Sai Prasad and
                 Lenitra Durham and Ritu Sahni and Robert Norton and
                 Merlin Curry and Holly Jimison and Richard Harper and
                 Robert A. Lowe",
  title =        "Experiences with context management in emergency
                 medicine",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "100:1--100:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485988",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In emergency medicine, patient care is intense and
                 stressful, often requiring paramedics to consult with
                 remote physicians to convey the patient's condition. We
                 present a framework for context-management in
                 telemedicine developed in collaboration between
                 engineers, physicians, and paramedics. We describe a
                 mobile platform and embedded wireless sensors to
                 capture physiological and audio context into a
                 comprehensive patient record, accessible locally and
                 remotely. We describe a first evaluation of this
                 technology by trained paramedics in simulated scenarios
                 and evaluate key aspects of system performance. Early
                 results suggest that wireless sensing can provide
                 reliable and low latency data both locally and to
                 remote physicians. In addition, audio context capture
                 is a promising approach to capturing a comprehensive
                 patient record, with a low rate of medically important
                 errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Valtazanos:2013:LSS,
  author =       "Aris Valtazanos and D. K. Arvind and Subramanian
                 Ramamoorthy",
  title =        "Latent space segmentation for mobile gait analysis",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "101:1--101:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485989",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An unsupervised learning algorithm is presented for
                 segmentation and evaluation of motion data from the
                 on-body Orient wireless motion capture system for
                 mobile gait analysis. The algorithm is model-free and
                 operates on the latent space of the motion, by first
                 aggregating all the sensor data into a single vector,
                 and then modeling them on a low-dimensional manifold to
                 perform segmentation. The proposed approach is
                 contrasted to a basic, model-based algorithm, which
                 operates directly on the joint angles computed by the
                 Orient sensor devices. The latent space algorithm is
                 shown to be capable of retrieving qualitative features
                 of the motion even in the face of noisy or incomplete
                 sensor readings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Masse:2013:MWE,
  author =       "Fabien Mass{\'e} and Martien {Van Bussel} and Aline
                 Serteyn and Johan Arends and Julien Penders",
  title =        "Miniaturized wireless {ECG} monitor for real-time
                 detection of epileptic seizures",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "102:1--102:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent advances in miniaturization of ultra-low power
                 components allow for more intelligent wearable health
                 monitors. The development and evaluation of a wireless
                 wearable electrocardiogram (ECG) monitor to detect
                 epileptic seizures from changes in the cardiac rhythm
                 is described. The ECG data are analyzed by embedded
                 algorithms: a robust beat-detection algorithm combined
                 with a real-time epileptic seizure detector. In its
                 current implementation, the proposed prototype is 52$
                 \times $ 36$ \times $ 15mm$^3$, and has an autonomy of
                 one day. Based on data collected on the first three
                 epilepsy patients, preliminary clinical results are
                 provided. Wireless, miniaturized and comfortable, this
                 prototype opens new perspectives for health
                 monitoring.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chi:2013:WNE,
  author =       "Yu M. Chi and Patrick Ng and Gert Cauwenberghs",
  title =        "Wireless noncontact {ECG} and {EEG} biopotential
                 sensors",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "103:1--103:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wearable, unobtrusive and patient friendly
                 physiological sensors will be a key driving force in
                 the wireless health revolution. Cardiac (ECG) and brain
                 (EEG) signals are two important signal modalities
                 indicative of healthy and diseased states of body and
                 mind that directly benefit from long-term monitoring.
                 Despite advancements in wireless and embedded
                 electronics technology, however, ECG/EEG monitoring
                 devices still face problems with patient compliance and
                 comfort from the use wet/gel electrodes. We have
                 developed two wireless biopotential instrumentation
                 systems using noncontact electrodes that can operate
                 without direct skin contact and through thin layers of
                 fabric. The first system is a general purpose
                 replacement for traditional ECG/EEG telemetry systems
                 and the second is a compact, fully self-contained
                 wireless ECG tag. All of the issues relating to the
                 design of low noise, high performance noncontact
                 sensors are discussed along with full technical
                 details, circuit schematics and construction
                 techniques. The noncontact electrode has been
                 integrated into both a wearable ECG chest harness as
                 well an EEG headband and characterized in a battery of
                 experiments that represent potential health
                 applications including resting ECG, exercise ECG and
                 EEG directly against standard clinical adhesive
                 Ag\slash AgCl electrodes. With careful design and
                 secure mechanical harnesses the noncontact sensor is
                 capable of approaching the quality of conventional
                 electrodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cardo:2013:ISS,
  author =       "Jos{\'e} Flich Cardo and Maurizio Palesi",
  title =        "Introduction to the special section on on-chip and
                 off-chip network architectures",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "104:1--104:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yu:2013:ANC,
  author =       "Qiaoyan Yu and Meilin Zhang and Paul Ampadu",
  title =        "Addressing network-on-chip router transient errors
                 with inherent information redundancy",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "105:1--105:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485993",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We exploit the inherent information redundancy in the
                 control path of Network-on-Chip (NoC) routers to manage
                 transient errors, preventing packet loss and
                 misrouting. Outputs of the routing arbitration units in
                 NoC routers can be used to determine arbitration
                 failures, because the valid arbitration outputs are a
                 subset of all possible values. This feature is
                 exploited to detect and correct logic and register
                 errors in the router arbitration control path. The
                 proposed method is complementary to other error
                 management methods for NoC routers. An analytical
                 reliability model of our method is provided, including
                 parameters such as logic unit size, different error
                 rates for logic gates and registers, and the location
                 of faulty elements. Compared to triple-modular
                 redundancy (TMR), the proposed method improves the
                 arbiter reliability by two orders of magnitude while
                 reducing the total area and power by 43\% and 64\%,
                 respectively. In the presented case studies, two
                 traffic traces from the PARSEC benchmark suite are used
                 to evaluate the average latency and energy consumption.
                 Simulations performed on a 4$ \times $ 4 NoC show that
                 our method reduces the average latency by up to 50\%
                 and reduces average energy by up to 70\% compared to
                 other methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghiribaldi:2013:CST,
  author =       "Alberto Ghiribaldi and Daniele Ludovici and Francisco
                 Trivi{\~n}o and Alessandro Strano and Jos{\'e} Flich
                 and Jos{\'e} Luis S{\'a}nchez and Francisco Alfaro and
                 Michele Favalli and Davide Bertozzi",
  title =        "A complete self-testing and self-configuring {NoC}
                 infrastructure for cost-effective {MPSoCs}",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "106:1--106:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485994",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  note =         "See comment \cite{Bishnoi:2015:BCC}.",
  abstract =     "Networks-on-chip need to survive to manufacturing
                 faults in order to sustain yield. An effective testing
                 and configuration strategy however implies two opposite
                 requirements. One one hand, a fast and scalable
                 built-in self-testing and self-diagnosis procedure has
                 to be carried out concurrently at NoC switches. On the
                 other hand, programming the NoC routing mechanism to go
                 around faulty links and switches can be optimally
                 performed by a centralized controller with global
                 network visibility. To the best of our knowledge, this
                 article proposes for the first time a global network
                 testing and configuration strategy that meets the
                 opposite requirements by means of a fault-tolerant dual
                 network architecture and a fast configuration algorithm
                 for the most common failure patterns. Experimental
                 results report an area overhead as low as 12.5\% with
                 respect to the baseline switch architecture while
                 achieving a high degree of fault tolerance. In fact,
                 even when multiple stuck-at faults are considered, the
                 capability of fault masking by the dual network is
                 always over 80\%, and the support for multiple link
                 failures is more than 90\% in presence of two unusable
                 links in the main network with minimum set-up times.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sem-Jacobsen:2013:ELC,
  author =       "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and Tor
                 Skeie and Alessandro Strano and Davide Bertozzi",
  title =        "An efficient, low-cost routing framework for convex
                 mesh partitions to support virtualization",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "107:1--107:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485995",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "At the core of an efficient chip multiprocessors (CMP)
                 is support for unicast and multicast routing, low
                 implementation costs, and the ability to isolate
                 concurrent applications with maximum utilization of the
                 CMP. We present an efficient logic-based unicast and
                 multicast routing algorithm that guarantees isolation
                 of local application traffic within any near-convex
                 region on the chip, and the algorithms to recognize
                 supported partitions and configure the cores
                 accordingly. Evaluations show that the routing
                 algorithm has a 57{\&}percent; more compact
                 implementation than a recent multicast solution with
                 the same coverage, and it achieves 5{\&}percent; higher
                 throughput with 13{\&}percent; lower latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seiculescu:2013:DBE,
  author =       "Ciprian Seiculescu and Dara Rahmati and Srinivasan
                 Murali and Hamid Sarbazi-Azad and Luca Benini and
                 Giovanni {De Micheli}",
  title =        "Designing best effort networks-on-chip to meet hard
                 latency constraints",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "108:1--108:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485996",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many classes of applications require Quality of
                 Service (QoS) guarantees from the system interconnect.
                 In Networks-on-Chip (NoC) QoS guarantees usually
                 translate into bandwidth and latency constraints for
                 the traffic flows and require hardware support in the
                 NoC fabric and its interfaces. In this article we
                 present a novel NoC synthesis framework to
                 automatically build networks that meet hard latency
                 constraints of end-to-end traffic streams without
                 requiring specialized hardware for the network
                 components. The hard latency constraints are met by
                 carefully designing the NoC topology and selecting the
                 appropriate routes for flow using lean best-effort
                 network components. We perform experiments on several
                 System on Chip (SoC) benchmarks. We compared against a
                 topology synthesis method with no support for real-time
                 constraints and we show that the proposed method can
                 produce topologies that can meet significantly tighter
                 worst case latency constraints (on average 44\%). We
                 also show that the tightest worst case latency can be
                 provided with little overhead on power consumption (on
                 average 8.5\%).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zahavi:2013:GNL,
  author =       "Eitan Zahavi and Israel Cidon and Avinoam Kolodny",
  title =        "{Gana}: a novel low-cost conflict-free {NoC}
                 architecture",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "109:1--109:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485997",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Similar to off-chip networks, current NoC
                 architectures are based on the store and forward of
                 uncoordinated end-to-end packet transmissions through
                 autonomous buffered routers. However, the monolithic
                 nature and the small physical dimensions of on chip
                 networks open up the opportunity for much more tightly
                 controlled architectures. We present GANA, a new Global
                 Arbiter NoC Architecture. In GANA, the transmission of
                 end-to-end data is timed by a global arbiter in a way
                 that avoids any queuing in the network. The arbitration
                 takes into account the complete transfer of the
                 end-to-end packets through the entire network path,
                 avoiding any intermediate queuing and hop-by-hop packet
                 arbitration. Consequently, buffers and arbiters are no
                 longer required in the routers, resulting in smaller
                 area and low power consumption. It is demonstrated
                 through detailed design and synthesis that the
                 additional area of the central arbiter and the control
                 path are negligible in comparison to the provided area
                 saving. For example, an 8$ \times $ 8 GANA consumes
                 only 16\% of the area of an equivalent autonomous NoC
                 while providing a better end-to-end throughput. The
                 end-to-end performance of GANA at high network loads is
                 typically much better than in a distributed-control
                 NOC, because resource contention and queuing in the
                 network are avoided. This comes at the cost of a few
                 percentage increase in latency at light loads due to
                 the additional arbitration phase. GANA architecture
                 combines the inherent benefits of a network
                 (parallelism and spatial reuse of links) with the
                 inherent benefits of high integration (global view of
                 the system state, central control, and
                 synchronization). The scalability of GANA is evaluated
                 analytically, showing that it can be superior to
                 fully-distributed networks in systems up to a size of
                 about 100 modules manufactured in 45nm technology,
                 which can be used today as well as in the foreseeable
                 future.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2013:NCA,
  author =       "Dongki Kim and Sungjoo Yoo and Sunggu Lee",
  title =        "A network congestion-aware memory subsystem for
                 manycore",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "110:1--110:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485998",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The network-on-chip (NoC) plays a crucial role in
                 memory performance due to the fact that it can handle
                 the majority of traffics from/to the DRAM memory
                 controllers. However, there has been little work on the
                 interplay between the NoC and memory controllers. In
                 this article, we address a problem called network
                 congestion-induced memory blocking and propose a novel
                 memory controller, which performs memory access
                 scheduling and network entry control in a network
                 congestion-aware manner. In case of network congestion,
                 in order to avoid performance degradation due to the
                 blocking caused by data bound for congested regions in
                 the NoC, the proposed memory controller favors requests
                 and data associated with uncongested regions. In
                 addition, in order to avoid the fairness problem of
                 such a policy, we also propose a gradual method, which
                 enables a trade-off between performance (in memory
                 utilization) and fairness (in memory access latency).
                 Experimental results show that the proposed method can
                 offer up to 1.76 to 2.99 times improvement in memory
                 utilization in the latency-tolerant designs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sem-Jacobsen:2013:EPE,
  author =       "Frank Olaf Sem-Jacobsen and Samuel Rodrigo and
                 Alessandro Strano and Tor Skeie and Davide Bertozzi and
                 Francisco Gilabert",
  title =        "Enabling power efficiency through dynamic rerouting
                 on-chip",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "111:1--111:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2485999",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Networks-on-chip (NoCs) are key components in
                 many-core chip designs. Dynamic power-awareness is a
                 new challenge present in NoCs that must be efficiently
                 handled by the routing functionality as it introduces
                 irregularities in the commonly used 2-D meshes. In this
                 article, we propose a logic-based routing algorithm,
                 iFDOR, oriented towards dynamic powering down one
                 region within every application partition on the chip
                 through dynamic rerouting, with low implementation
                 costs. Results show that we can successfully shutdown
                 an arbitrary rectangular region within an application
                 partition without significant impact on network
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anonymous:2013:AOS,
  author =       "Anonymous",
  title =        "Abstracts: Online Supplements Volume 12, Number 1s,
                 Volume 12, Number 2s",
  journal =      j-TECS,
  volume =       "12",
  number =       "4",
  pages =        "112:1--112:??",
  month =        jun,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2485984.2499550",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 1 18:28:35 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2013:SDM,
  author =       "Mo Li and Zheng Yang and Yunhao Liu",
  title =        "Sea depth measurement with restricted floating
                 sensors",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512448",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sea depth monitoring is a critical task for ensuring
                 safe operation of harbors. Traditional schemes largely
                 rely on labor-intensive work and expensive hardware.
                 This study explores the possibility of deploying
                 networked sensors on the surface of the sea, measuring
                 and reporting the sea depth of given areas. We propose
                 a Restricted Floating Sensors (RFS) model in which
                 sensor nodes are anchored to the sea bottom, floating
                 within a restricted area. Distinguished from
                 traditional stationary or mobile sensor networks, the
                 RFS network consists of sensor nodes with restricted
                 mobility. We construct the network model and elaborate
                 the corresponding localization problem. We show that by
                 locating such RFS sensors, the sea depth can be
                 estimated without the help of any extra ranging
                 devices. A prototype system with 25 Telos sensor nodes
                 is deployed to validate this design. We also examine
                 the efficiency and scalability of this design through
                 large-scale simulations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anand:2013:CCS,
  author =       "Madhukar Anand and Sebastian Fischmeister and Insup
                 Lee",
  title =        "A comparison of compositional schedulability analysis
                 techniques for hierarchical real-time systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501626.2501629",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Schedulability analysis of hierarchical real-time
                 embedded systems involves defining interfaces that
                 represent the underlying system faithfully and then
                 compositionally analyzing those interfaces. Whereas
                 commonly used abstractions, such as periodic and
                 sporadic tasks and their interfaces, are simple and
                 well studied, results for more complex and expressive
                 abstractions and interfaces based on task graphs and
                 automata are limited. One contributory factor may be
                 the hardness of compositional schedulability analysis
                 with task graphs and automata. Recently, conditional
                 task models, such as the recurring branching task
                 model, have been introduced with the goal of reaching a
                 middle ground in the trade-off between expressivity and
                 ease of analysis. Consequently, techniques for
                 compositional analysis with conditional models have
                 also been proposed, and each offer different
                 advantages. In this work, we revisit those techniques,
                 compare their advantages using an automotive case
                 study, and identify limitations that would need to be
                 addressed before adopting these techniques for use with
                 real-world problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{MartinezSantos:2013:LSA,
  author =       "Juan Carlos {Martinez Santos} and Yunsi Fei",
  title =        "Leveraging speculative architectures for runtime
                 program validation",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512456",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Program execution can be tampered with by malicious
                 attackers through exploiting software vulnerabilities.
                 Changing the program behavior by compromising control
                 data and decision data has become the most serious
                 threat in computer system security. Although several
                 hardware approaches have been presented to validate
                 program execution, they either incur great hardware
                 overhead or introduce false alarms. We propose a new
                 hardware-based approach by leveraging the existing
                 speculative architectures for runtime program
                 validation. The on-chip branch target buffer (BTB) is
                 utilized as a cache of the legitimate control flow
                 transfers stored in a secure memory region. In
                 addition, the BTB is extended to store the correct
                 program path information. At each indirect branch site,
                 the BTB is used to validate the decision history of
                 previous conditional branches and monitor the following
                 execution path at runtime. Implementation of this
                 approach is transparent to the upper operating system
                 and programs. Thus, it is applicable to legacy code.
                 Because of good code locality of the executable
                 programs and effectiveness of branch prediction, the
                 frequency of control-flow validations against the
                 secure off-chip memory is low. Our experimental results
                 show a negligible performance penalty and small storage
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hsieh:2013:TAM,
  author =       "Ang-Chih Hsieh and Tingting Hwang",
  title =        "Thermal-aware memory mapping in {$3$D} designs",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512457",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "DRAM is usually used as main memory for program
                 execution. The thermal behavior of a memory block in a
                 3D SIP is affected not only by the power behavior but
                 also the heat dissipating ability of that block. The
                 power behavior of a block is related to the
                 applications run on the system, while the heat
                 dissipating ability is determined by the number of tier
                 and the position the block locates. Therefore, a
                 thermal-aware memory allocator should consider the
                 following two points. First, the allocator should
                 consider not only the power behavior of a logic block
                 but also the physical location during memory mapping
                 and second, the changing temperature of a physical
                 block during execution of programs. In this article, we
                 will propose a memory mapping algorithm taking into
                 consideration these two points. Our technique can be
                 classified as static thermal management to be applied
                 to embedded software designs. Experiments show that for
                 single-core systems, our method can reduce the
                 temperature of memory system by 17.1${}^\circ $C, as
                 compared to a straightforward mapping in the best case,
                 and 13.3${}^\circ $C on average. For systems with four
                 cores, the temperature reductions are 9.9${}^\circ $C
                 and 11.6${}^\circ $C on average when L1 cache of each
                 core is set to 4KB and 8KB, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bai:2013:SOS,
  author =       "Ke Bai and Aviral Shrivastava",
  title =        "A software-only scheme for managing heap data on
                 limited local memory ({LLM}) multicore processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501626.2501632",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a scheme for managing heap data
                 in the local memory present in each core of a limited
                 local memory (LLM) multicore architecture. Although
                 managing heap data semi-automatically with software
                 cache is feasible, it may require modifications of
                 other thread codes. Crossthread modifications are very
                 difficult to code and debug, and will become more
                 complex and challenging as we increase the number of
                 cores. In this article, we propose an intuitive
                 programming interface, which is an automatic and
                 scalable scheme for heap data management. Besides, for
                 embedded applications, where the maximum heap size can
                 be profiled, we propose several optimizations on our
                 heap management to significantly decrease the library
                 overheads. Our experiments on several benchmarks from
                 MiBench executing on the Sony Playstation 3 show that
                 our scheme is natural to use, and if we know the
                 maximum size of heap data, our optimizations can
                 improve application performance by an average of
                 14\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gu:2013:DDL,
  author =       "Ji Gu and Hui Guo and Tohru Ishihara",
  title =        "{DLIC}: Decoded loop instructions caching for
                 energy-aware embedded processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512464",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the explosive proliferation of embedded systems,
                 especially through countless portable devices and
                 wireless equipment used, embedded systems have become
                 indispensable to the modern society and people's life.
                 Those devices are often battery driven. Therefore, low
                 energy consumption in embedded processors is important
                 and becomes critical in step with the system
                 complexity. The on-chip instruction cache (I-cache) is
                 usually the most energy-consuming component on the
                 processor chip due to its large size and frequent
                 access operations. To reduce such energy consumption,
                 the existing loop cache approaches use a tiny decoded
                 cache to filter the I-cache access and instruction
                 decode activity for repeated loop iterations. However,
                 such designs are effective for small and simple loops,
                 and only suitable for DSP kernel-like applications.
                 They are not effectual for many embedded applications
                 where complex loops are common. In this article, we
                 propose a decoded loop instruction cache (DLIC) that is
                 small, hence energy efficient, yet can capture most
                 loops, including large nested ones with branch
                 executions, so that a significant amount of I-cache
                 accesses and instruction decoding can be eradicated.
                 The experiments on a set of embedded benchmarks show
                 that our proposed DLIC scheme can reduce energy
                 consumption by up to 87\% as compared to normal
                 cache-only design. On average, 66\% energy can be saved
                 on instruction fetching and decoding, while at a
                 performance overhead of only 1.4\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stanley-Marbell:2013:LPP,
  author =       "Phillip Stanley-Marbell",
  title =        "{L24}: Parallelism, performance, energy efficiency,
                 and cost trade-offs in future sensor platforms",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512465",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Networks of sensors must process large amounts of
                 intermittently-available data in situ. This motivates
                 the investigation of means for achieving high
                 performance when required, but ultra-low-power
                 dissipation when idle. One approach to this challenge
                 is the use of embedded multiprocessor systems, leading
                 to trade-offs between parallelism, performance, energy
                 efficiency, and cost. To evaluate these trade-offs and
                 to gain insight for future system designs, this article
                 presents the design, implementation, and evaluation of
                 a miniature, energy-scalable, 24-processor module, L24,
                 for use in embedded sensor systems. Analytic results
                 and empirical evidence motivating such embedded
                 multiprocessors is provided, and a parallel fixed-point
                 fast Fourier transform implementation is presented.
                 This application is used as a challenging but realistic
                 evaluator of the presented hardware platform. Through a
                 combination of hardware measurements, instruction-level
                 microarchitectural simulation, and analytic modeling,
                 it is demonstrated that the platform provides idle
                 power dissipation over an order of magnitude lower than
                 systems employing a monolithic processor of equivalent
                 performance, while dynamic power dissipation remains
                 competitive. Taking into account both application
                 computation and interprocessor communication demands,
                 it is shown that there may exist an optimum operating
                 voltage that minimizes either time-to-solution, energy
                 usage, or the energy-delay product. This optimum
                 operating point is formulated analytically, calibrated
                 with system measurements, and evaluated for the
                 hardware platform and application presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{So:2013:STI,
  author =       "Won So and Alexander G. Dean",
  title =        "Software thread integration for instruction-level
                 parallelism",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512466",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multimedia applications require a significantly higher
                 level of performance than previous workloads of
                 embedded systems. They have driven digital signal
                 processor (DSP) makers to adopt high-performance
                 architectures like VLIW (Very-Long Instruction Word).
                 Despite many efforts to exploit instruction-level
                 parallelism (ILP) in the application, the speed is a
                 fraction of what it could be, limited by the difficulty
                 of finding enough independent instructions to keep all
                 of the processor's functional units busy. This article
                 proposes Software Thread Integration (STI) for
                 instruction-level parallelism. STI is a software
                 technique for interleaving multiple threads of control
                 into a single implicitly multithreaded one. We use STI
                 to improve the performance on ILP processors by merging
                 parallel procedures into one, increasing the compiler's
                 scope and hence allowing it to create a more efficient
                 instruction schedule. Assuming the parallel procedures
                 are given, we define a methodology for finding the best
                 performing integrated procedure with a minimum
                 compilation time. We quantitatively estimate the
                 performance impact of integration, allowing various
                 integration scenarios to be compared and ranked via
                 profitability analysis. During integration of threads,
                 different ILP-improving code transformations are
                 selectively applied according to the control structure
                 and the ILP characteristics of the code, driven by
                 interactions with software pipelining. The estimated
                 profitability is verified and corrected by an iterative
                 compilation approach, compensating for possible
                 estimation inaccuracy. Our modeling methods combined
                 with limited compilation quickly find the best
                 integration scenario without requiring exhaustive
                 integration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghasemzadeh:2013:ULP,
  author =       "Hassan Ghasemzadeh and Roozbeh Jafari",
  title =        "Ultra low-power signal processing in wearable
                 monitoring systems: a tiered screening architecture
                 with optimal bit resolution",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501626.2501636",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in technology have led to the development of
                 wearable sensing, computing, and communication devices
                 that can be woven into the physical environment of our
                 daily lives, enabling a large variety of new
                 applications in several domains, including wellness and
                 health care. Despite their tremendous potential to
                 impact our lives, wearable health monitoring systems
                 face a number of hurdles to become a reality. The
                 enabling processors and architectures demand a large
                 amount of energy, requiring sizable batteries. In this
                 article, we propose a granular decision-making
                 architecture for physical movement monitoring
                 applications. The module can be viewed as a tiered
                 wake-up circuitry. This decision-making module, in
                 combination with a low-power microcontroller, allows
                 for significant power saving through an ultra low-power
                 processing architecture. The significant power saving
                 is achieved by performing a preliminary ultra low-power
                 signal processing, and hence, keeping the
                 microcontroller off when the incoming signal is not of
                 interest. The preliminary signal processing is
                 performed by a set of special-purpose functional units,
                 also called screening blocks, that implement template
                 matching functions. We formulate and solve an
                 optimization problem for selecting screening blocks
                 such that the accuracy requirements of the signal
                 processing are accommodated while the total power is
                 minimized. Our experimental results on real data from
                 wearable motion sensors show that the proposed
                 algorithm achieves 63.2\% energy saving while
                 maintaining a sensitivity of 94.3\% in recognizing
                 transitional actions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2013:RED,
  author =       "Yuan-Hao Chang and Ming-Chang Yang and Tei-Wei Kuo and
                 Ren-Hung Hwang",
  title =        "A reliability enhancement design under the flash
                 translation layer for {MLC}-based flash-memory storage
                 systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512467",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Although flash memory has gained very strong momentum
                 in the storage market, the reliability of flash-memory
                 chips has been dropped significantly in the past years.
                 This article presents a reliability enhancement design
                 under the flash management layer (i.e., flash
                 translation layer) to address this concern so as to
                 reduce the design complexity of flash-memory management
                 software/firmware and to improve the maintainability
                 and portability of existing and future products. In
                 particular, a log-based write strategy with a
                 hash-based caching policy is proposed to provide extra
                 ECC redundancy and performance improvement. Strategies
                 for bad block management are also presented. The
                 failure rate of flash-memory storage systems is
                 analyzed with the considerations of bit errors. The
                 proposed design is later evaluated by a series of
                 experiments based on realistic traces. It was shown
                 that the proposed approach could significantly improve
                 the reliability of flash memory with very limited
                 system overheads.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chao:2013:TLA,
  author =       "Chih-Hao Chao and Kun-Chih Chen and Tsu-Chu Yin and
                 Shu-Yen Lin and An-Yeu (Andy) Wu",
  title =        "Transport-layer-assisted routing for runtime thermal
                 management of {$3$D} {NoC} systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512468",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "To ensure thermal safety and to avoid performance
                 degradation from temperature regulation in 3D NoC, we
                 propose a new temperature-traffic control framework.
                 The framework contains the vertical throttling-based
                 runtime thermal management (VT-RTM) scheme and the
                 transport-layer assisted routing (TLAR) scheme. VT-RTM
                 scheme increases the cooling speed and maintains high
                 availability. TLAR scheme sustains the throughput of
                 the nonstationary irregular mesh network. In our
                 experiments, VT-RTM scheme reduces cooling time by 84\%
                 and achieves 98\% network availability; the overall
                 performance impact is around 8\% of traditional
                 schemes. TLAR scheme reduces average latency by 35\%
                 and improves sustainable throughput by 76\%",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kent:2013:CPS,
  author =       "Christopher G. Kent and Joann M. Paul",
  title =        "Contextual partitioning for speech recognition",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2501626.2501639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many multicore computers are single-user devices,
                 creating the potential to partition by situational
                 usage contexts, similar to how the human brain is
                 organized. Contextual partitioning (CP) permits
                 multiple simplified versions of the same task to exist
                 in parallel, with selection tied to the context in use.
                 We introduce CP for speech recognition, specifically
                 targeted at user interfaces in handheld embedded
                 devices. Contexts are drawn from webpage interactions.
                 CP results in 61\% fewer decoding errors, 97\% less
                 training for vocabulary changes, near-linear scaling
                 potential with increasing core counts, and up to a
                 potential 90\% reduction in power usage.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2013:DER,
  author =       "Sunwoo Kim and Won Seob Jeong and Won W. Ro and
                 Jean-Luc Gaudiot",
  title =        "Design and evaluation of random linear network coding
                 accelerators on {FPGAs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512469",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network coding is a well-known technique used to
                 enhance network throughput and reliability by applying
                 special coding to data packets. One critical problem in
                 practice, when using the random linear network coding
                 technique, is the high computational overhead. More
                 specifically, using this technique in embedded systems
                 with low computational power might cause serious delays
                 due to the complex Galois field operations and matrix
                 handling. To this end, this article proposes a
                 high-performance decoding logic for random linear
                 network coding using field-programmable gate-array
                 (FPGA) technology. We expect that the inherent
                 reconfigurability of FPGAs will provide sufficient
                 performance as well as programmability to cope with
                 changes in the specification of the coding. The main
                 design motivation was to improve the decoding delay by
                 dividing and parallelizing the entire decoding process.
                 Fast arithmetic operations are achieved by the proposed
                 parallelized GF ALUs, which allow calculations with all
                 the elements of a single row of a matrix to be
                 performed concurrently. To improve the flexibility in
                 the utilization of the FPGA components, two different
                 decoding methods have been designed and compared. The
                 performance of the proposed idea is evaluated by
                 comparing with the performance of the decoding process
                 executed by general-purpose processors through an
                 equivalent software algorithm. Overall, a maximum
                 throughput of 65.98 Mbps is achieved with the proposed
                 FPGA design on an XC5VLX110T Virtex 5 device. In
                 addition, the proposed design provides speedups of up
                 to 13.84 compared to an aggressively parallelized
                 software decoding algorithm run on a quad-core AMD
                 processor. Moreover, the design affords 12 times higher
                 power efficiency in terms of throughput per watt than
                 an ARM Coretex-A9 processor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Beg:2013:CPA,
  author =       "Mirza Beg and Peter van Beek",
  title =        "A constraint programming approach for integrated
                 spatial and temporal scheduling for clustered
                 architectures",
  journal =      j-TECS,
  volume =       "13",
  number =       "1",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2512470",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 5 19:03:11 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many embedded processors use clustering to scale up
                 instruction-level parallelism in a cost-effective
                 manner. In a clustered architecture, the registers and
                 functional units are partitioned into smaller units and
                 clusters communicate through register-to-register copy
                 operations. Texas Instruments, for example, has a
                 series of architectures for embedded processors which
                 are clustered. Such an architecture places a heavier
                 burden on the compiler, which must now assign
                 instructions to clusters (spatial scheduling), assign
                 instructions to cycles (temporal scheduling), and
                 schedule copy operations to move data between clusters.
                 We consider instruction scheduling of local blocks of
                 code on clustered architectures to improve performance.
                 Scheduling for space and time is known to be a hard
                 problem. Previous work has proposed greedy approaches
                 based on list scheduling to simultaneously perform
                 spatial and temporal scheduling and phased approaches
                 based on first partitioning a block of code to do
                 spatial assignment and then performing temporal
                 scheduling. Greedy approaches risk making mistakes that
                 are then costly to recover from, and partitioning
                 approaches suffer from the well-known phase ordering
                 problem. In this article, we present a constraint
                 programming approach for scheduling instructions on
                 clustered architectures. We employ a problem
                 decomposition technique that solves spatial and
                 temporal scheduling in an integrated manner. We analyze
                 the effect of different hardware parameters-such as the
                 number of clusters, issue-width, and intercluster
                 communication cost-on application performance. We found
                 that our approach was able to achieve an improvement of
                 up to 26\%, on average, over a state-of-the-art
                 technique on superblocks from SPEC 2000 benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brisk:2013:ISI,
  author =       "Philip Brisk and Tulika Mitra",
  title =        "Introduction to the special issue on
                 application-specific processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514642",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vyas:2013:HAS,
  author =       "Sudhanshu Vyas and Adwait Gupte and Christopher D.
                 Gill and Ron K. Cytron and Joseph Zambreno and Phillip
                 H. Jones",
  title =        "Hardware architectural support for control systems and
                 sensor processing",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "16:1--16:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514643",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The field of modern control theory and the systems
                 used to implement these controls have shown rapid
                 development over the last 50 years. It was often the
                 case that those developing control algorithms could
                 assume the computing medium was solely dedicated to the
                 task of controlling a plant, for example, the control
                 algorithm being implemented in software on a dedicated
                 Digital Signal Processor (DSP), or implemented in
                 hardware using a simple dedicated Programmable Logic
                 Device (PLD). As time progressed, the drive to place
                 more system functionality in a single component
                 (reducing power, cost, and increasing reliability) has
                 made this assumption less often true. Thus, it has been
                 pointed out by some experts in the field of control
                 theory (e.g., Astrom) that those developing control
                 algorithms must take into account the effects of
                 running their algorithms on systems that will be shared
                 with other tasks. One aspect of the work presented in
                 this article is a hardware architecture that allows
                 control developers to maintain this simplifying
                 assumption. We focus specifically on the
                 Proportional-Integral-Derivative (PID) controller. An
                 on-chip coprocessor has been implemented that can scale
                 to support servicing hundreds of plants, while
                 maintaining microsecond-level response times, tight
                 deterministic control loop timing, and allowing the
                 main processor to service noncontrol tasks. In order to
                 control a plant, the controller needs information about
                 the plant's state. Typically this information is
                 obtained from sensors with which the plant has been
                 instrumented. There are a number of common computations
                 that may be performed on this sensor data before being
                 presented to the controller (e.g., averaging and
                 thresholding). Thus in addition to supporting PID
                 algorithms, we have developed a Sensor Processing Unit
                 (SPU) that off-loads these common sensor processing
                 tasks from the main processor. We have prototyped our
                 ideas using Field Programmable Gate Array (FPGA)
                 technology. Through our experimental results, we show
                 our PID execution unit gives orders of magnitude
                 improvement in response time when servicing many
                 plants, as compared to a standard general software
                 implementation. We also show that the SPU scales much
                 better than a general software implementation. In
                 addition, these execution units allow the simplifying
                 assumption of dedicated computing medium to hold for
                 control algorithm development.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Beldianu:2013:MBV,
  author =       "Spiridon F. Beldianu and Sotirios G. Ziavras",
  title =        "Multicore-based vector coprocessor sharing for
                 performance and energy gains",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "17:1--17:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514644",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For most of the applications that make use of a
                 dedicated vector coprocessor, its resources are not
                 highly utilized due to the lack of sustained data
                 parallelism which often occurs due to vector-length
                 variations in dynamic environments. The motivation of
                 our work stems from: (a) the mandate for multicore
                 designs to make efficient use of on-chip resources for
                 low power and high performance; (b) the omnipresence of
                 vector operations in high-performance scientific and
                 emerging embedded applications; (c) the need to often
                 handle a variety of vector sizes; and (d) vector
                 kernels in application suites may have diverse
                 computation needs. We present a robust design framework
                 for vector coprocessor sharing in multicore
                 environments that maximizes vector unit utilization and
                 performance at substantially reduced energy costs. For
                 our adaptive vector unit, which is attached to multiple
                 cores, we propose three basic shared working policies
                 that enforce coarse-grain, fine-grain, and vector-lane
                 sharing. We benchmark these vector coprocessor sharing
                 policies for a dual-core system and evaluate them using
                 the floating-point performance, resource utilization,
                 and power/energy consumption metrics. Benchmarking for
                 FIR filtering, FFT, matrix multiplication, and LU
                 factorization shows that these coprocessor sharing
                 policies yield high utilization and performance with
                 low energy costs. The proposed policies provide 1.2--2
                 speedups and reduce the energy needs by about 50\% as
                 compared to a system having a single core with an
                 attached vector coprocessor. With the performance
                 expressed in clock cycles, the sharing policies
                 demonstrate 3.62--7.92 speedups compared to optimized
                 Xeon runs. We also introduce performance and empirical
                 power models that can be used by the runtime system to
                 estimate the effectiveness of each policy in a hybrid
                 system that can simultaneously implement this suite of
                 shared coprocessor policies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jungeblut:2013:SAO,
  author =       "Thorsten Jungeblut and Boris H{\"u}bener and Mario
                 Porrmann and Ulrich R{\"u}ckert",
  title =        "A systematic approach for optimized bypass
                 configurations for application-specific embedded
                 processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "18:1--18:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514645",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The diversity of today's mobile applications requires
                 embedded processor cores with a high resource
                 efficiency, that means, the devices should provide a
                 high performance at low area requirements and power
                 consumption. The fine-grained parallelism supported by
                 multiple functional units of VLIW architectures offers
                 a high throughput at reasonable low clock frequencies
                 compared to single-core RISC processors. To efficiently
                 utilize the processor pipeline, common system
                 architectures have to cope with data hazards due to
                 data dependencies between consecutive operations. On
                 the one hand, such hazards can be resolved by complex
                 forwarding circuits (i.e., a pipeline bypass) which
                 forward intermediate results to a subsequent
                 instruction. On the other hand, the pipeline bypass can
                 strongly affect or even dominate the total resource
                 requirements and degrade the maximum clock frequency.
                 In this work the CoreVA VLIW architecture is used for
                 the development and the analysis of
                 application-specific bypass configurations. It is shown
                 that many paths of a comprehensive bypass system are
                 rarely used and may not be required for certain
                 applications. For this reason, several strategies have
                 been implemented to enhance the efficiency of the total
                 system by introducing application-specific bypass
                 configurations. The configuration can be carried out
                 statically by only implementing required paths or at
                 runtime by dynamically reconfiguring the hardware. An
                 algorithm is proposed which derives an optimized
                 configuration by iteratively disabling single bypass
                 paths. The adaptation of these application-specific
                 bypass configurations allows for a reduction of the
                 critical path by 26\%. As a result, the execution time
                 and energy requirements could be reduced by up to
                 21.5\%. Using Dynamic Frequency Scaling (DFS) and
                 dynamic deactivation/reactivation of bypass paths
                 allows for a runtime reconfiguration of the bypass
                 system. This ensures the highest efficiency while
                 processing varying applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Theodoropoulos:2013:CAM,
  author =       "Dimitris Theodoropoulos and Georgi Kuzmanov and Georgi
                 Gaydadjiev",
  title =        "Custom architecture for multicore audio beamforming
                 systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "19:1--19:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514646",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The audio Beamforming (BF) technique utilizes
                 microphone arrays to extract acoustic sources recorded
                 in a noisy environment. In this article, we propose a
                 new approach for rapid development of multicore BF
                 systems. Research on literature reveals that the
                 majority of such experimental and commercial audio
                 systems are based on desktop PCs, due to their
                 high-level programming support and potential of rapid
                 system development. However, these approaches introduce
                 performance bottlenecks, excessive power consumption,
                 and increased overall cost. Systems based on DSPs
                 require very low power, but their performance is still
                 limited. Custom hardware solutions alleviate the
                 aforementioned drawbacks, however, designers primarily
                 focus on performance optimization without providing a
                 high-level interface for system control and test. In
                 order to address the aforementioned problems, we
                 propose a custom platform-independent architecture for
                 reconfigurable audio BF systems. To evaluate our
                 proposal, we implement our architecture as a
                 heterogeneous multicore reconfigurable processor and
                 map it onto FPGAs. Our approach combines the software
                 flexibility of General-Purpose Processors (GPPs) with
                 the computational power of multicore platforms. In
                 order to evaluate our system we compare it against a BF
                 software application implemented to a low-power Atom
                 330, a middle-ranged Core2 Duo, and a high-end Core i3.
                 Experimental results suggest that our proposed solution
                 can extract up to 16 audio sources in real time under a
                 16-microphone setup. In contrast, under the same setup,
                 the Atom 330 cannot extract any audio sources in real
                 time, while the Core2 Duo and the Core i3 can process
                 in real time only up to 4 and 6 sources respectively.
                 Furthermore, a Virtex4-based BF system consumes more
                 than an order less energy compared to the
                 aforementioned GPP-based approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mariani:2013:DSE,
  author =       "Giovanni Mariani and Gianluca Palermo and Vittorio
                 Zaccaria and Cristina Silvano",
  title =        "Design-space exploration and runtime resource
                 management for multicores",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "20:1--20:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514647",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application-specific multicore architectures are
                 usually designed by using a configurable platform in
                 which a set of parameters can be tuned to find the best
                 trade-off in terms of the selected figures of merit
                 (such as energy, delay, and area). This multi-objective
                 optimization phase is called Design-Space Exploration
                 (DSE). Among the design-time (hardware) configurable
                 parameters we can find the memory subsystem
                 configuration (such as cache size and associativity)
                 and other architectural parameters such as the
                 instruction-level parallelism of the system processors.
                 Among the runtime (software) configurable parameters we
                 can find the degree of task-level parallelism
                 associated with each application running on the
                 platform. The contribution of this article is twofold;
                 first, we introduce an evolutionary (NSGA-II-based)
                 methodology for identifying a hardware configuration
                 which is robust with respect to applications and
                 corresponding datasets. Second, we introduce a novel
                 runtime heuristic that exploits design-time identified
                 operating points to provide guaranteed throughput to
                 each application. Experimental results show that the
                 design-time/runtime combined approach improves the
                 runtime performance of the system with respect to
                 existing reference techniques, while meeting the
                 overall power budget.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2013:MPE,
  author =       "Yooseong Kim and Aviral Shrivastava",
  title =        "Memory performance estimation of {CUDA} programs",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "21:1--21:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514648",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "CUDA has successfully popularized GPU computing, and
                 GPGPU applications are now used in various embedded
                 systems. The CUDA programming model provides a simple
                 interface to program on GPUs, but tuning GPGPU
                 applications for high performance is still quite
                 challenging. Programmers need to consider numerous
                 architectural details, and small changes in source
                 code, especially on the memory access pattern, can
                 affect performance significantly. This makes it very
                 difficult to optimize CUDA programs. This article
                 presents CuMAPz, which is a tool to analyze and compare
                 the memory performance of CUDA programs. CuMAPz can
                 help programmers explore different ways of using shared
                 and global memories, and optimize their program for
                 efficient memory behavior. CuMAPz models several
                 memory-performance-related factors: data reuse, global
                 memory access coalescing, global memory latency hiding,
                 shared memory bank conflict, channel skew, and branch
                 divergence. Experimental results show that CuMAPz can
                 accurately estimate performance with correlation
                 coefficient of 0.96. By using CuMAPz to explore the
                 memory access design space, we could improve the
                 performance of our benchmarks by 30\% more than the
                 previous approach [Hong and Kim 2010].",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stamoulias:2013:PAK,
  author =       "Ioannis Stamoulias and Elias S. Manolakos",
  title =        "Parallel architectures for the {kNN} classifier ---
                 design of soft {IP} cores and {FPGA} implementations",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "22:1--22:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514649",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We designed a variety of k-nearest-neighbor parallel
                 architectures for FPGAs in the form of parameterizable
                 soft IP cores. We show that they can be used to solve
                 large classification problems with thousands of
                 training vectors, or thousands of vector dimensions
                 using a single FPGA, and achieve very high throughput.
                 They can be used to flexibly synthesize architectures
                 that also cover: 1NN classification (vector
                 quantization), multishot queries (with different $k$),
                 LOOCV cross-validation, and compare favorably to GPU
                 implementations. To the best of our knowledge this is
                 the first attempt to design flexible IP cores for the
                 popular kNN classifier.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2013:ASP,
  author =       "Chen Huang and Frank Vahid and Tony Givargis",
  title =        "Automatic synthesis of physical system differential
                 equation models to a custom network of general
                 processing elements on {FPGAs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "23:1--23:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514650",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Fast execution of physical system models has various
                 uses, such as simulating physical phenomena or
                 real-time testing of medical equipment. Physical system
                 models commonly consist of thousands of differential
                 equations. Solving such equations using software on
                 microprocessor devices may be slow. Several past
                 efforts implement such models as parallel circuits on
                 special computing devices called Field-Programmable
                 Gate Arrays (FPGAs), demonstrating large speedups due
                 to the excellent match between the massive fine-grained
                 local communication parallelism common in physical
                 models and the fine-grained parallel compute elements
                 and local connectivity of FPGAs. However, past
                 implementation efforts were mostly manual or ad hoc. We
                 present the first method for automatically converting a
                 set of ordinary differential equations into circuits on
                 FPGAs. The method uses a general Processing Element
                 (PE) that we developed, designed to quickly solve a set
                 of ordinary differential equations while using few FPGA
                 resources. The method instantiates a network of general
                 PEs, partitions equations among the PEs to minimize
                 communication, generates each PE's custom program,
                 creates custom connections among PEs, and maintains
                 synchronization of all PEs in the network. Our
                 experiments show that the method generates a 400-PE
                 network on a commercial FPGA that executes four
                 different models on average $ 15 \times $ faster than a
                 3 GHz Intel processor, $ 30 \times $ faster than a
                 commercial 4-core ARM, $ 14 \times $ faster than a
                 commercial 6-core Texas Instruments digital signal
                 processor, and $ 4.4 \times $ faster than an NVIDIA
                 336-core graphics processing unit. We also show that
                 the FPGA-based approach is reasonably cost effective
                 compared to using the other platforms. The method
                 yields $ 2.1 \times $ faster circuits than a commercial
                 high-level synthesis tool that uses the traditional
                 method for converting behavior to circuits, while using
                 $ 2 \times $ fewer lookup tables, $ 2 \times $ fewer
                 hardcore multiplier (DSP) units, though $ 3.5 \times $
                 more block RAM due to being programmable. Furthermore,
                 the method does not just generate a single fastest
                 design, but generates a range of designs that trade off
                 size and performance, by using different numbers of
                 PEs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Canis:2013:LOS,
  author =       "Andrew Canis and Jongsok Choi and Mark Aldham and
                 Victor Zhang and Ahmed Kammoona and Tomasz Czajkowski
                 and Stephen D. Brown and Jason H. Anderson",
  title =        "{LegUp}: an open-source high-level synthesis tool for
                 {FPGA}-based processor\slash accelerator systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "24:1--24:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514740",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is generally accepted that a custom hardware
                 implementation of a set of computations will provide
                 superior speed and energy efficiency relative to a
                 software implementation. However, the cost and
                 difficulty of hardware design is often prohibitive, and
                 consequently, a software approach is used for most
                 applications. In this article, we introduce a new
                 high-level synthesis tool called LegUp that allows
                 software techniques to be used for hardware design.
                 LegUp accepts a standard C program as input and
                 automatically compiles the program to a hybrid
                 architecture containing an FPGA-based MIPS soft
                 processor and custom hardware accelerators that
                 communicate through a standard bus interface. In the
                 hybrid processor/accelerator architecture, program
                 segments that are unsuitable for hardware
                 implementation can execute in software on the
                 processor. LegUp can synthesize most of the C language
                 to hardware, including fixed-sized multidimensional
                 arrays, structs, global variables, and pointer
                 arithmetic. Results show that the tool produces
                 hardware solutions of comparable quality to a
                 commercial high-level synthesis tool. We also give
                 results demonstrating the ability of the tool to
                 explore the hardware/software codesign space by varying
                 the amount of a program that runs in software versus
                 hardware. LegUp, along with a set of benchmark C
                 programs, is open source and freely downloadable,
                 providing a powerful platform that can be leveraged for
                 new research on a wide range of high-level synthesis
                 topics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Papakonstantinou:2013:ECC,
  author =       "Alexandros Papakonstantinou and Karthik Gururaj and
                 John A. Stratton and Deming Chen and Jason Cong and
                 Wen-Mei W. Hwu",
  title =        "Efficient compilation of {CUDA} kernels for
                 high-performance computing on {FPGAs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "2",
  pages =        "25:1--25:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514641.2514652",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Sep 27 18:13:13 MDT 2013",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The rise of multicore architectures across all
                 computing domains has opened the door to heterogeneous
                 multiprocessors, where processors of different compute
                 characteristics can be combined to effectively boost
                 the performance per watt of different application
                 kernels. GPUs, in particular, are becoming very popular
                 for speeding up compute-intensive kernels of
                 scientific, imaging, and simulation applications. New
                 programming models that facilitate parallel processing
                 on heterogeneous systems containing GPUs are spreading
                 rapidly in the computing community. By leveraging these
                 investments, the developers of other accelerators have
                 an opportunity to significantly reduce the programming
                 effort by supporting those accelerator models already
                 gaining popularity. In this work, we adapt one such
                 language, the CUDA programming model, into a new FPGA
                 design flow called FCUDA, which efficiently maps the
                 coarse- and fine-grained parallelism exposed in CUDA
                 onto the reconfigurable fabric. Our CUDA-to-FPGA flow
                 employs AutoPilot, an advanced high-level synthesis
                 tool (available from Xilinx) which enables
                 high-abstraction FPGA programming. FCUDA is based on a
                 source-to-source compilation that transforms the SIMT
                 (Single Instruction, Multiple Thread) CUDA code into
                 task-level parallel C code for AutoPilot. We describe
                 the details of our CUDA-to-FPGA flow and demonstrate
                 the highly competitive performance of the resulting
                 customized FPGA multicore accelerators. To the best of
                 our knowledge, this is the first CUDA-to-FPGA flow to
                 demonstrate the applicability and potential advantage
                 of using the CUDA programming model for
                 high-performance computing in FPGAs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Editors:2013:ISS,
  author =       "{Editors}",
  title =        "Introduction to the special section on
                 {ESTIMedia'10}",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "26:1--26:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536748",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jia:2013:SLI,
  author =       "Zai Jian Jia and Tom{\'a}s Bautista and Antonio
                 N{\'u}{\~n}ez and Andy D. Pimentel and Mark Thompson",
  title =        "A system-level infrastructure for multidimensional
                 {MP-SoC} design space co-exploration",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "27:1--27:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536749",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a flexible and extensible
                 system-level MP-SoC design space exploration (DSE)
                 infrastructure, called NASA. This highly modular
                 framework uses well-defined interfaces to easily
                 integrate different system-level simulation tools as
                 well as different combinations of search strategies in
                 a simple plug-and-play fashion. Moreover, NASA deploys
                 a so-called dimension-oriented DSE approach, allowing
                 designers to configure the appropriate number of,
                 well-tuned and possibly different, search algorithms to
                 simultaneously co-explore the various design space
                 dimensions. As a result, NASA provides a flexible and
                 re-usable framework for the systematic exploration of
                 the multidimensional MP-SoC design space, starting from
                 a set of relatively simple user specifications. To
                 demonstrate the capabilities of the NASA framework and
                 to illustrate its distinct aspects, we also present
                 several DSE experiments in which, for example, we
                 compare NASA configurations using a single search
                 algorithm for all design space dimensions to
                 configurations using a separate search algorithm per
                 dimension. These proof-of-concept experiments indicate
                 that the latter multidimensional co-exploration can
                 find better design points and evaluates a higher
                 diversity of design alternatives as compared to the
                 more traditional approach of using a single search
                 algorithm for all dimensions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nadezhkin:2013:AGP,
  author =       "Dmitry Nadezhkin and Hristo Nikolov and Todor
                 Stefanov",
  title =        "Automated generation of polyhedral process networks
                 from affine nested-loop programs with dynamic loop
                 bounds",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "28:1--28:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536750",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Process Networks (PNs) is a suitable parallel
                 model of computation (MoC) used to specify embedded
                 streaming applications in a parallel form facilitating
                 the efficient mapping onto embedded parallel execution
                 platforms. Unfortunately, specifying an application
                 using a parallel MoC is a very difficult and highly
                 error-prone task. To overcome the associated
                 difficulties, we have developed the pn compiler, which
                 derives specific Polyhedral Process Networks (PPN)
                 parallel specifications from sequential static affine
                 nested loop programs (SANLPs). However, there are many
                 applications, for example, multimedia applications
                 (MPEG coders/decoders, smart cameras, etc.) that have
                 adaptive and dynamic behavior which cannot be expressed
                 as SANLPs. Therefore, in order to handle dynamic
                 multimedia applications, in this article we address the
                 important question whether we can relax some of the
                 restrictions of the SANLPs while keeping the ability to
                 perform compile-time analysis and to derive PPNs.
                 Achieving this would significantly extend the range of
                 applications that can be parallelized in an automated
                 way. The main contribution of this article is a first
                 approach for automated translation of affine nested
                 loop programs with dynamic loop bounds into
                 input-output equivalent Polyhedral Process Networks. In
                 addition, we present a method for analyzing the
                 execution overhead introduced in the PPNs derived from
                 programs with dynamic loop bounds. The presented
                 automated translation approach has been evaluated by
                 deriving a PPN parallel specification from a real-life
                 application called Low Speed Obstacle Detection (LSOD)
                 used in the smart cameras domain. By executing the
                 derived PPN, we have obtained results which indicate
                 that the approach we present in this article
                 facilitates efficient parallel implementations of
                 sequential nested loop programs with dynamic loop
                 bounds. That is, our approach reveals the possible
                 parallelism available in such applications, which
                 allows for the utilization of multiple cores in an
                 efficient way.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2013:AMC,
  author =       "Yulei Wu and Geyong Min and Dakai Zhu and Laurence T.
                 Yang",
  title =        "An analytical model for on-chip interconnects in
                 multimedia embedded systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "29:1--29:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536751",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The traffic pattern has significant impact on the
                 performance of network-on-chip. Many recent studies
                 have shown that multimedia applications can be
                 supported in on-chip interconnects. Driven by the
                 motivation of evaluating on-chip interconnects in
                 multimedia embedded systems, a new analytical model is
                 proposed to investigate the performance of the fat-tree
                 based on-chip interconnection network under bursty
                 multimedia traffic and nonuniform message destinations.
                 Extensive simulation experiments are conducted to
                 validate the accuracy of the model, which is then
                 adopted as a cost-efficient tool to investigate the
                 effects of bursty multimedia traffic with nonuniform
                 destinations on the network performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Che:2013:SSD,
  author =       "Weijia Che and Karam S. Chatha",
  title =        "Scheduling of synchronous data flow models onto
                 scratchpad memory-based embedded processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "30:1--30:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536752",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a heuristic algorithm for
                 scheduling synchronous data flow (SDF) models on
                 scratch pad memory (SPM) enhanced processors with the
                 objective of minimizing its steady-state execution
                 time. The task involves partitioning the limited
                 on-chip SPM for actor code and data buffer, and
                 executing actors in such a manner that the physical SPM
                 is time shared with different actors and buffers
                 (formally defined as code overlay and data overlay,
                 respectively). In our setup, a traditional minimum
                 buffer schedule could result in very high code overlay
                 overhead and therefore may not be optimal. To reduce
                 the number of direct memory access (DMA) transfers,
                 actors need to be grouped into segments. Prefetching of
                 code and data overlay that overlaps DMA transfers with
                 actor executions also need to be exploited. The
                 efficiency of the our heuristic was evaluated by
                 compiling ten stream applications onto one synergistic
                 processing engine (SPE) of an IBM Cell Broadband
                 Engine. We compare the performance results of our
                 heuristic approach with a minimum buffer scheduling
                 approach and a 3-stage ILP approach, and show that our
                 heuristic is able to generate high quality solutions
                 with fast algorithm run time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schmoll:2013:IFR,
  author =       "Florian Schmoll and Andreas Heinig and Peter Marwedel
                 and Michael Engel",
  title =        "Improving the fault resilience of an {H.264} decoder
                 using static analysis methods",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "31:1--31:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536753",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Fault tolerance rapidly evolves into one of the most
                 significant design objectives for embedded systems due
                 to reduced semiconductor structures and supply
                 voltages. However, resource-constrained systems cannot
                 afford traditional error correction for overhead and
                 cost reasons. New methods are required to sustain
                 acceptable service quality in case of errors while
                 avoiding crashes. We present a flexible fault-tolerance
                 approach that is able to select correction actions
                 depending on error semantics using application
                 annotations and static analysis approaches. We verify
                 the validity of our approach by analyzing the
                 vulnerability and improving the reliability of an H.264
                 decoder using flexible error handling.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Parmer:2013:PCC,
  author =       "Gabriel Parmer and Richard West",
  title =        "Predictable and configurable component-based
                 scheduling in the {Composite OS}",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "32:1--32:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536754",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents the design of user-level
                 scheduling hierarchies in the Composite component-based
                 system. The motivation for this is centered around the
                 design of a system that is both dependable and
                 predictable, and which is configurable to the needs of
                 specific applications. Untrusted application developers
                 can safely develop services and policies, that are
                 isolated in protection domains outside the kernel. To
                 ensure predictability, Composite enforces timing
                 control over user-space services. Moreover, it must
                 provide a means by which asynchronous events, such as
                 interrupts, are handled in a timely manner without
                 jeopardizing the system. Towards this end, we describe
                 the features of Composite that allow user-defined
                 scheduling policies to be composed for the purposes of
                 combined interrupt and task management. A significant
                 challenge arises from the need to synchronize access to
                 shared data structures (e.g., scheduling queues),
                 without allowing untrusted code to disable interrupts.
                 Additionally, efficient upcall mechanisms are needed to
                 deliver asynchronous event notifications in accordance
                 with policy-specific priorities, without undue recourse
                 to schedulers. We show how these issues are addressed
                 in Composite, by comparing several hierarchies of
                 scheduling polices, to manage both tasks and the
                 interrupts on which they depend. Studies show how it is
                 possible to implement guaranteed differentiated
                 services as part of the handling of I/O requests from a
                 network device while diminishing livelock.
                 Microbenchmarks indicate that the costs of implementing
                 and invoking user-level schedulers in Composite are on
                 par with, or less than, those in other systems, with
                 thread switches more than twice as fast as in Linux.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2013:ARD,
  author =       "Bo Zhou and Xiaobo Sharon Hu and Danny Z. Chen and
                 Cedric X. Yu",
  title =        "Accelerating radiation dose calculation: a
                 multi-{FPGA} solution",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "33:1--33:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536755",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Remarkable progress has been made in the past few
                 decades in various aspects of radiation therapy (RT).
                 However, some of these promising technologies, such as
                 image-guided online replanning and arc therapy, rely
                 heavily on the availability of fast dose calculation.
                 In this article, based on a popular dose calculation
                 algorithm, the Collapsed-Cone Convolution/Superposition
                 (CCCS) algorithm, we present a multi-FPGA accelerator
                 to speed up radiation dose calculation. Our
                 performance-driven design strategy yields a fully
                 pipelined architecture, which includes a
                 resource-economic raytracing engine and
                 high-performance energy deposition pipeline. An
                 evaluation based on a set of clinical treatment
                 planning cases confirms that our FPGA design almost
                 fully utilizes the available external memory bandwidth
                 and achieves close to the best possible performance for
                 the CCCS algorithm while using less resource. Compared
                 with an existing FPGA design which aimed to accelerate
                 the identical algorithm, the proposed design achieved
                 1.9X speedup by providing better memory bandwidth
                 utilization (81.7\% v.s. 43\% of the available external
                 memory bandwidth), higher working frequency (90MHz v.s.
                 70MHz) and less logic resource usage (25K v.s. 55K
                 logic cells). Furthermore, it obtains a speedup of 20X
                 over a commercial multithreaded software on a quad-core
                 system and 15X performance improvement over closely
                 related results. In terms of accuracy, the measured
                 less than 1\% statistical fluctuation indicates that
                 our solution is practical in real medical scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Furtado:2013:CON,
  author =       "Pedro Furtado and Jos{\'e} Cec{\'\i}lio",
  title =        "Configuration and operation of networked control
                 systems over heterogeneous {WSANs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "34:1--34:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536756",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There have been both research and commercial advances
                 on applying Wireless Sensor and Actuator Networks (WSN)
                 in industrial premises. These have cost advantages
                 related to avoiding some cabled deployments. A possible
                 architecture involves a Networked Control System (NCS)
                 with many small WSN subnetworks, cabled nodes and
                 computer servers (e.g., servers, control stations). In
                 those systems individual sensor nodes can be
                 programmed, as opposed to cabled analog systems. We
                 investigate approaches for networked-wide
                 configuration, where all nodes-cabled or WSN
                 sensors-can be configured with simplicity from a single
                 interface, instead of hand-coding or complex
                 configurations of individual nodes. We propose an
                 architecture and approach for configuration and
                 operation. Previous related proposals on middleware
                 involving WSNs suffer from two major limitations: they
                 either program within an individual WSN or configure
                 operation outside WSNs, wrapping data coming from WSN.
                 They do not allow configuring WSN and non-WSN nodes for
                 operation from a single interface. We discuss the
                 architecture and propose the NCSWSN configuration and
                 operation approach. We are applying this system in an
                 industrial testbed, therefore we test the approach and
                 also show user interfaces and results from the
                 deployment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sanz:2013:SLM,
  author =       "Concepci{\'o}n Sanz and Jos{\'e} Ignacio G{\'o}mez and
                 Christian Tenllado and Manuel Prieto and Francky
                 Catthoor",
  title =        "System-level memory management based on statistical
                 variability compensation for frame-based applications",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "35:1--35:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536757",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Process variability and dynamic domains increase the
                 uncertainty of embedded systems and force designers to
                 apply pessimistic designs, which become unnecessarily
                 conservative and have a tremendous impact on both
                 performance and energy consumption. In this context,
                 developing uncertainty-aware design methodologies that
                 take both variation at platform and at application
                 level into account becomes a must. These methodologies
                 should mitigate the effects derived from uncertainty,
                 avoiding worst-case assumptions. In this article we
                 propose a comprehensive methodology to tackle two forms
                 of uncertainty: (1) process variation on the memory
                 system, (2) application dynamism. A statistical model
                 has been developed to deal with variability derived
                 from fabrication process, whereas system scenarios are
                 selected to cope with dynamic domains. Both sources of
                 uncertainty are firstly tackled in combination at
                 design time, to be refined later, at setup. As a
                 result, at run time the platform can be successfully
                 adapted to the current application behaviour as well as
                 the current variations. Our simulations show that this
                 methodology provides significant energy savings while
                 still meeting strict timing constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mohaqeqi:2013:ASR,
  author =       "Morteza Mohaqeqi and Mehdi Kargahi and Maryam
                 Dehghan",
  title =        "Adaptive scheduling of real-time systems cosupplied by
                 renewable and nonrenewable energy sources",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "36:1--36:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536758",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy management is an important issue in today's
                 real-time systems due to the high costs of energy
                 supplying. Using renewable, like wave, wind, and solar
                 energy sources seem promising methods to address this
                 issue. However, because of the existing contrast
                 between the critical nature of hard real-time systems
                 and the unpredictable nature of renewable energies,
                 some supplementary energy source like electricity grid
                 or battery is needed. In this paper, we consider hard
                 real-time systems with two renewable and nonrenewable
                 energy sources. In order to reduce the costs, we
                 present two dynamic voltage scaling controllers to
                 minimize the energy attained from the latter source. In
                 order to handle variations of the environmental energy
                 and workload, the model predictive control approach is
                 employed. One nonlinear approach beside one fast linear
                 piecewise affine explicit controller are proposed. The
                 efficacies of the proposed approaches have been
                 investigated through extensive simulations. Comparisons
                 to an ideal clairvoyant controller as a baseline show
                 that, in the studied scenarios, the proposed
                 controllers guarantee at least 78\% of the baseline
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lo:2013:AGH,
  author =       "Chen-Kang Lo and Mao-Lin Li and Li-Chun Chen and
                 Yi-Shan Lu and Ren-Song Tsay and Hsu-Yao Huang and
                 Jen-Chieh Yeh",
  title =        "Automatic generation of high-speed accurate {TLM}
                 models for out-of-order pipelined bus",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "37:1--37:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536759",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Although pipelined/out-of-order (PL/OO) execution
                 features are commonly supported by the state-of-the-art
                 bus designs, no existing manual
                 Transaction-Level-Modeling (TLM) approaches can
                 effectively construct fast and accurate simulation
                 models for PL/OO buses. Mainly, the inherent high
                 design complexity of concurrent PL/OO behaviors makes
                 the manual approaches tedious and error-prone. To
                 tackle the complicated modeling task, this article
                 presents an automatic approach that performs systematic
                 abstraction and generation of fast-and-accurate
                 simulation models. The experimental results show that
                 our approach reduces 21 times modeling efforts, while
                 our generated models perform simulation an order of
                 magnitude faster than Cycle-Accurate models with the
                 same PL/OO transaction execution cycle counts
                 preserved.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2013:SBR,
  author =       "Jongeun Lee and Aviral Shrivastava",
  title =        "Software-based register file vulnerability reduction
                 for embedded processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "38:1--38:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536760",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Register File (RF) is extremely vulnerable to soft
                 errors, and traditional redundancy based schemes to
                 protect the RF are prohibitive not only because RF is
                 often in the timing critical path of the processor, but
                 also since it is one of the hottest blocks on the chip.
                 Software approaches would be ideal in this case, but
                 previous approaches based on instruction scheduling are
                 only moderately effective due to local scope. In this
                 article we present a compiler approach, based on
                 interprocedural program analysis, to reduce the
                 vulnerability of registers by temporarily writing live
                 variables to protected memory. We formulate the problem
                 as an integer linear programming problem and also
                 present a very efficient heuristic algorithm. Further
                 we present an iterative optimization method based on
                 Kernighan--Lin's graph partitioning algorithm. Our
                 experiments demonstrate that our proposed techniques
                 can reduce the vulnerability of a RF by 33 to 37\% on
                 average and up to 66\%, with a small 2\% increase in
                 runtime. In addition, our overhead reduction
                 optimization can effectively reduce the code size
                 overhead, by more than 40\% on average, to a mere 5 to
                 6\%, compared to highly optimized binaries.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Singh:2013:MCN,
  author =       "Anshul Singh and Arindam Basu and Keck-Voon Ling and
                 Vincent J. {Mooney III}",
  title =        "Models for characterizing noise based {PCMOS}
                 circuits",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "39:1--39:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536761",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Quick and accurate error-rate prediction of
                 Probabilistic CMOS (PCMOS) circuits is crucial for
                 their systematic design and performance evaluation.
                 While still in the early stage of research, PCMOS has
                 shown potential to drastically reduce energy
                 consumption at a cost of increased errors. Recently, a
                 methodology has been proposed which could predict the
                 error rates of cascade structures of blocks in PCMOS.
                 This methodology requires error rates of unique blocks
                 to predict the error rates of multiblock cascade
                 structures composed of these unique blocks. In this
                 article we present a new model for characterization of
                 probabilistic circuits/blocks and present a procedure
                 to find and characterize unique circuits/blocks. Unlike
                 prior approaches, our new model distinguishes distinct
                 filtering effects per output, thereby improving
                 prediction accuracy by an average of 95\% over the
                 prior art by Palem and coauthors. Furthermore, we show
                 two models where our new model with three stages is
                 18\% more accurate, on average, than our simpler
                 two-stage model. We apply our proposed models to Ripple
                 Carry Adders and Wallace Tree Multipliers and show that
                 using our models, the methodology of cascade structures
                 can predict error rates of PCMOS circuits with
                 reasonable accuracy (within 9\%) in PCMOS for uniform
                 voltages as well as multiple voltages. Finally, our
                 approach takes seconds of simulation time whereas using
                 HSPICE would take days of simulation time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anagnostopoulos:2013:PAD,
  author =       "Iraklis Anagnostopoulos and Jean-Michel Chabloz and
                 Ioannis Koutras and Alexandros Bartzas and Ahmed Hemani
                 and Dimitrios Soudris",
  title =        "Power-aware dynamic memory management on many-core
                 platforms utilizing {DVFS}",
  journal =      j-TECS,
  volume =       "13",
  number =       "1s",
  pages =        "40:1--40:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536747.2536762",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 9 11:30:05 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today multicore platforms are already prevalent
                 solutions for modern embedded systems. In the future,
                 embedded platforms will have an even more increased
                 processor core count, composing many-core platforms. In
                 addition, applications are becoming more complex and
                 dynamic and try to efficiently utilize the amount of
                 available resources on the embedded platforms.
                 Efficient memory utilization is a key challenge for
                 application developers, especially since memory is a
                 scarce resource and often becomes the system's
                 bottleneck. To cope with this dynamism and achieve
                 better memory footprint utilization (low memory
                 fragmentation) application developers resort to the
                 usage of dynamic memory (heap) management techniques,
                 by allocating and deallocating data at runtime.
                 Moreover, overall power consumption is another key
                 challenge that needs to be taken into consideration.
                 Towards this, designers employ the usage of Dynamic
                 Voltage and Frequency Scaling (DVFS) mechanisms,
                 adapting to the application's computational demands at
                 runtime. In this article, we propose the combination of
                 dynamic memory management techniques with DVFS ones.
                 This is performed by integrating, within the memory
                 manager, runtime monitoring mechanisms that steer the
                 DVFS mechanisms to adjust clock frequency and voltage
                 supply based on heap performance. The proposed approach
                 has been evaluated on a distributed shared-memory
                 many-core platform composed of multiple LEON3
                 processors interconnected by a Network-on-Chip
                 infrastructure, supporting DVFS. Experimental results
                 show that by using the proposed method for monitoring
                 and applying DVFS mechanisms the power consumption
                 concerning dynamic memory management was reduced by
                 approximately 37\%. In addition we present the
                 trade-offs the proposed approach. Last, by combining
                 the developed method with heap fragmentation-aware
                 dynamic memory managers, we achieve low heap
                 fragmentation values combined with low power
                 consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Editors:2014:MMA,
  author =       "{Editors}",
  title =        "Monitoring massive appliances by a minimal number of
                 smart meters",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "56:1--56:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544376",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a framework for deploying a
                 minimal number of smart meters to accurately track the
                 ON/OFF states of a massive number of electrical
                 appliances which exploits the sparseness feature of
                 simultaneous ON/OFF switching events of the massive
                 appliances. A theoretical bound on the least number of
                 required smart meters is studied by an entropy-based
                 approach, which qualifies the impact of meter
                 deployment strategies to the state tracking accuracy.
                 It motivates a meter deployment optimization algorithm
                 (MDOP) to minimize the number of meters while
                 satisfying given requirements to state tracking
                 accuracy. To accurately decode the real-time ON/OFF
                 states of appliances by the readings of meters, a fast
                 state decoding (FSD) algorithm based on the hidden
                 Markov model (HMM) is presented to track the state
                 sequence of each appliance for better accuracy.
                 Although traditional HMM needs $ O(t 2^{2 N}) $ time
                 complexity to conduct online sequence decoding, FSD
                 improves the complexity to $ O (t n^{U + 1}) $, where n
                 {$<$} N and U is an upper bound of the simultaneous
                 switching events. Both MDOP and FSD are verified
                 extensively using simulations and real PowerNet data.
                 The results show that the meter deployment cost can be
                 saved by more than 80\% while still getting over 90\%
                 state tracking accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2014:EDF,
  author =       "Chenye Wu and Yiyu Shi and Soummya Kar",
  title =        "Exploring demand flexibility in heterogeneous
                 aggregators: an {LMP}-based pricing scheme",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "57:1--57:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544377",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the proposed penetration of electric vehicles and
                 advanced metering technology, the demand side is
                 foreseen to play a major role in flexible energy
                 consumption scheduling. On the other hand, the past
                 several years have witnessed utility companies' growing
                 interests to integrate more renewable energy resources.
                 These renewable resources, for example, wind or solar,
                 due to their intermittent nature, brought great
                 uncertainty to the power grid system. In this article,
                 we propose a mechanism that attempts to mitigate the
                 grid operational uncertainty induced by renewable
                 energies by properly exploiting demand flexibility with
                 the help of advanced smart-metering technology. To
                 address the challenge, we develop a novel locational
                 marginal price (LMP)-based pricing scheme that involves
                 active demand-side participation by casting the network
                 objective as a two-stage Stackelberg game between the
                 local grid operator and several aggregators. In
                 contrast to the conventional notion that generation
                 follows load, our game formulation provides more
                 flexibility for the operators and tries to provide
                 adequate incentives for the loads to follow the
                 (stochastic renewable) generation. We use the solution
                 concept of subgame perfect equilibrium to analyze the
                 resulting game. Subsequently, we discuss the optimal
                 real-time conventional capacity planning for the local
                 grid operator to achieve the minimal mismatch between
                 supply and demand with the wind power integration.
                 Finally, we assess our proposed scheme with field data.
                 The simulation results show that our proposed scheme
                 works reasonably well in the long term, even with
                 simple heuristics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2014:ISS,
  author =       "Naehyuck Chang and Jian-Jia Chen",
  title =        "Introduction to the special section on
                 {ESTIMedia'11}",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "58:1--58:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544378",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Su:2014:RVP,
  author =       "Tzu-Hsiang Su and Hsiang-Jen Tsai and Keng-Hao Yang
                 and Po-Chun Chang and Tien-Fu Chen and Yi-Ting Zhao",
  title =        "Reconfigurable vertical profiling framework for the
                 {Android} runtime system",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "59:1--59:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544379",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Dalvik virtual machine in the Android system creates a
                 profiling barrier between VM-space applications and
                 Linux user-space libraries. It is difficult for
                 existing profiling tools on the Android system to
                 definitively identify whether a bottleneck occurred in
                 the application level, the Linux user-space level, or
                 the Linux kernel level. Information barriers exist
                 between VM-space applications and Linux native analysis
                 tools due to runtime virtual machines' dynamic memory
                 allocation mechanism. Furthermore, traditional vertical
                 profiling tools targeted for Java virtual machines
                 cannot be simply applied on the Dalvik virtual machine
                 due to its unique design. The proposed the
                 Reconfigurable Vertical Profiling Framework bridges the
                 information gap and streamlines the hardware-software
                 co-design process for the Android runtime system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Song:2014:POA,
  author =       "Wook Song and Yeseong Kim and Hakbong Kim and Jehun
                 Lim and Jihong Kim",
  title =        "Personalized optimization for {Android} smartphones",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "60:1--60:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544380",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As a highly personalized computing device, smartphones
                 present a unique new opportunity for system
                 optimization. For example, it is widely observed that a
                 smartphone user exhibits very regular application usage
                 patterns (although different users are quite different
                 in their usage patterns). User-specific high-level app
                 usage information, when properly managed, can provide
                 valuable hints for optimizing various system design
                 requirements. In this article, we describe the design
                 and implementation of a personalized optimization
                 framework for the Android platform that takes advantage
                 of user's application usage patterns in optimizing the
                 performance of the Android platform. Our optimization
                 framework consists of two main components, the
                 application usage modeling module and the usage
                 model-based optimization module. We have developed two
                 novel application usage models that correctly capture
                 typical smartphone user's application usage patterns.
                 Based on the application usage models, we have
                 implemented an app-launching experience optimization
                 technique which tries to minimize user-perceived
                 delays, extra energy consumption, and state loss when a
                 user launches apps. Our experimental results on the
                 Nexus S Android reference phones show that our proposed
                 optimization technique can avoid unnecessary
                 application restarts by up to 78.4\% over the default
                 LRU-based policy of the Android platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mirzoyan:2014:PVA,
  author =       "Davit Mirzoyan and Benny Akesson and Kees Goossens",
  title =        "Process-variation-aware mapping of best-effort and
                 real-time streaming applications to {MPSoCs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "61:1--61:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2490819",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As technology scales, the impact of process variation
                 on the maximum supported frequency (FMAX) of individual
                 cores in a multiprocessor system-on-chip (MPSoC)
                 becomes more pronounced. Task allocation without
                 variation-aware performance analysis can greatly
                 compromise performance and lead to a significant loss
                 in yield, defined as the percentage of manufactured
                 chips satisfying the application timing requirement. We
                 propose variation-aware task allocation for best-effort
                 and real-time streaming applications modeled as task
                 graphs. Our solutions are primarily based on the
                 throughput requirement, which is the most important
                 timing requirement in many real-time streaming
                 applications. The four main contributions of this work
                 are (1) distinguishing best-effort firm real-time and
                 soft real-time application classes, which require
                 different optimization criteria, (2) using dataflow
                 graphs, which are well suited for modeling and analysis
                 of streaming applications, we explicitly model task
                 execution both in terms of clock cycles (which is
                 independent of variation) and seconds (which does
                 depend on the variation of the resource), which we
                 connect by an explicit binding, (3) we present two
                 optimization approaches, which give different
                 improvement results at different costs, (4) we present
                 both exhaustive and heuristic algorithms that implement
                 the optimization approaches. Our variation-aware
                 mapping algorithms are tested on models of seven real
                 applications and are compared to mapping methods that
                 are unaware of hardware variation. Our results
                 demonstrate (1) improvements in the average performance
                 (3\% on average) for best-effort applications, and (2)
                 for firm real-time and soft real-time applications,
                 yield improvements of up to 27\% with an average of
                 15\%, showing the effectiveness of our approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jung:2014:HCO,
  author =       "Dong-Heon Jung and Soo-Mook Moon and Hyeong-Seok Oh",
  title =        "Hybrid compilation and optimization for {Java}-based
                 digital {TV} platforms",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "62:1--62:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2506257",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Java-based software platform for interactive
                 digital TV (DTV) is composed of the system/middleware
                 class statically installed on the DTV set-top box and
                 the xlet applications dynamically downloaded from the
                 TV stations. The xlet application includes Java classes
                 and image/text files. The xlets are executed only when
                 the TV viewer initiates an interaction, even if the
                 xlets have been completely downloaded. To achieve high
                 performance on this dual-component, user-initiated
                 system, existing just-in-time (JIT) compilation and
                 optimization is not enough; instead, ahead-of-time and
                 idle-time compilation and optimization are also needed,
                 requiring a hybrid compilation and optimization
                 environment. We constructed such a hybrid environment
                 for a commercial DTV software platform and evaluated it
                 using real, on-air xlet applications. Our experimental
                 results show that the proposed hybrid environment can
                 improve the DTV Java performance by more than three
                 times, compared to the JIT-only environment, with
                 little change to other DTV behavior.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2014:RAC,
  author =       "Li-Pin Chang and Chen-Yi Wen",
  title =        "Reducing asynchrony in channel garbage-collection for
                 improving internal parallelism of multichannel
                 solid-state disks",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "63:1--63:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544383",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Solid-state disks use multichannel architectures to
                 boost their data transfer rates. Because realistic disk
                 workloads have numerous small write requests, modern
                 flash-storage devices adopt a write buffer and a set of
                 independent channels for better parallelism in serving
                 small write requests. When a channel is undergoing
                 garbage collection, it stops responding to inbound
                 write traffic and accumulates page data in the write
                 buffer. This results in contention for buffer space and
                 creates idle periods in channels. This study presents a
                 channel-management strategy, called garbage-collection
                 advancing, which allows early start of garbage
                 collection in channels for increasing the overlap among
                 channel activities of garbage collection and restoring
                 the balance of buffer-space usage among channels. This
                 study further introduces cycle filling, which is a
                 version of garbage-collection advancing tailored for
                 the operation model of flash planes. Experimental
                 results show that the proposed methods greatly
                 outperformed existing designs of multichannel systems
                 in terms of response and throughput. We also
                 successfully implemented the proposed methods in a real
                 solid-state disk and proved their feasibility in real
                 hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2014:MRT,
  author =       "Zheng Li and Frank Lockom and Shangping Ren",
  title =        "Maintaining real-time application timing similarity
                 for defect-tolerant {NoC}-based many-core systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "64:1--64:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544384",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many-core Network-on-Chip (NoC) processors are
                 emerging in broad application areas, including those
                 with timing requirements, such as real-time and
                 multimedia applications. Typically, these processors
                 employ core-level backup to improve yield. However,
                 when defective cores are replaced by backup ones, the
                 NoC topology changes. Consequently, a fine-tuned
                 application based on timing parameters given by one
                 topology may not meet the expected timing behavior
                 under the new one. We first develop a metric to measure
                 timing similarity of an application on different NoC
                 topologies and then propose mixed binary quadratic
                 programming and greedy algorithms to reconfigure a
                 defect-tolerant many-core NoC.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2014:TSA,
  author =       "Masud Ahmed and Nathan Fisher",
  title =        "Tractable schedulability analysis and resource
                 allocation for real-time multimodal systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "65:1--65:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544385",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time multimedia subsystems often require support
                 for switching between different resource and
                 application execution modes. To ensure that timing
                 constraints are not violated during or after a
                 subsystem mode change, real-time schedulability
                 analysis is required. However, existing time-efficient
                 multimode schedulability analysis techniques for
                 application-only mode changes are not appropriate for
                 subsystems that require changes in the resource
                 execution behavior (e.g., processors with dynamic power
                 modes). Furthermore, all existing multimode
                 schedulability analysis that handles both resource and
                 application mode changes is highly exponential and not
                 scalable for subsystems with a moderate or large number
                 of modes. As a result, the notion of resource
                 optimality is still unaddressed for real-time
                 multimodal systems. In this report, we first address
                 the lack of tractable schedulability analysis for such
                 subsystems by proposing a model for characterizing
                 multiple resource and application modes and by deriving
                 a sufficient schedulability test that has
                 pseudo-polynomial time complexity. Finally, we propose
                 an algorithm which leverages this pseudo-polynomial
                 schedulability analysis to optimize the resource usages
                 (e.g., to minimize peak-power load) of a multimodal
                 real-time system. Simulation results show that our
                 proposed algorithms for schedulability analysis and
                 resource allocation, when compared with
                 previously-proposed approaches, require significantly
                 less time and are just as precise.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Balani:2014:DPF,
  author =       "Rahul Balani and Lucas F. Wanner and Mani B.
                 Srivastava",
  title =        "Distributed programming framework for fast iterative
                 optimization in networked cyber-physical systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "66:1--66:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544386",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Large-scale coordination and control problems in
                 cyber-physical systems are often expressed within the
                 networked optimization model. While significant
                 advances have taken place in optimization techniques,
                 their widespread adoption in practical implementations
                 has been impeded by the complexity of internode
                 coordination and lack of programming support for the
                 same. Currently, application developers build their own
                 elaborate coordination mechanisms for synchronized
                 execution and coherent access to shared resources via
                 distributed and concurrent controller processes.
                 However, they typically tend to be error prone and
                 inefficient due to tight constraints on application
                 development time and cost. This is unacceptable in many
                 CPS applications, as it can result in expensive and
                 often irreversible side-effects in the environment due
                 to inaccurate or delayed reaction of the control
                 system. This article explores the design of a
                 distributed shared memory (DSM) architecture that
                 abstracts the details of internode coordination. It
                 simplifies application design by transparently managing
                 routing, messaging, and discovery of nodes for coherent
                 access to shared resources. Our key contribution is the
                 design of provably correct locality-sensitive
                 synchronization mechanisms that exploit the spatial
                 locality inherent in actuation to drive faster and
                 scalable application execution through opportunistic
                 data parallel operation. As a result, applications
                 encoded in the proposed Hotline Application Programming
                 Framework are error free, and in many scenarios,
                 exhibit faster reactions to environmental events over
                 conventional implementations. Relative to our prior
                 work, this article extends Hotline with a new
                 locality-sensitive coordination mechanism for improved
                 reaction times and two tunable iteration control
                 schemes for lower message costs. Our extensive
                 evaluation demonstrates that realistic performance and
                 cost of applications are highly sensitive to the
                 prevalent deployment, network, and environmental
                 characteristics. This highlights the importance of
                 Hotline, which provides user-configurable options to
                 trivially tune these metrics and thus affords time to
                 the developers for implementing, evaluating, and
                 comparing multiple algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brandt:2014:PCS,
  author =       "Jens Brandt and Klaus Schneider and Yu Bai",
  title =        "Passive code in synchronous programs",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "67:1--67:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The synchronous model of computation requires that in
                 every step, inputs are read and outputs are
                 synchronously computed as the reaction of the program.
                 In addition, all internal variables are updated in
                 parallel even though not all of these values might be
                 required for the current and the future reaction steps.
                 To avoid unnecessary computations, we present a
                 compile-time optimization procedure that computes for
                 every variable a condition that determines whether its
                 value is required for current or future computations.
                 In this sense, our optimizations allow us to identify
                 passive code that can be disabled to avoid unnecessary
                 computations and therefore to reduce the reaction time
                 of programs or their energy consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gu:2014:AES,
  author =       "Yu Gu and Liang He and Ting Zhu and Tian He",
  title =        "Achieving energy-synchronized communication in
                 energy-harvesting wireless sensor networks",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "68:1--68:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With advances in energy-harvesting techniques, it is
                 now feasible to build sustainable sensor networks to
                 support long-term applications. Unlike battery-powered
                 sensor networks, the objective of sustainable sensor
                 networks is to effectively utilize a continuous stream
                 of ambient energy. Instead of pushing the limits of
                 energy conservation, we aim to design
                 energy-synchronized schemes that keep energy supplies
                 and demands in balance. Specifically, this work
                 presents Energy-Synchronized Communication (ESC) as a
                 transparent middleware between the network layer and
                 MAC layer that controls the amount and timing of RF
                 activity at receiving nodes. In this work, we first
                 derive a delay model for cross-traffic at individual
                 nodes, which reveals an interesting stair effect. This
                 effect allows us to design a localized energy
                 synchronization control with $ o(d^3) $ time complexity
                 that shuffles or adjusts the working schedule of a node
                 to optimize cross-traffic delays in the presence of
                 changing duty cycle budgets, where d is the node degree
                 in the network. Under different rates of energy
                 fluctuations, shuffle-based and adjustment-based
                 methods have different influences on logical
                 connectivity and cross-traffic delay, due to the
                 inconsistent views of working schedules among
                 neighboring nodes before schedule updates. We study the
                 trade-off between them and propose methods for updating
                 working schedules efficiently. To evaluate our work,
                 ESC is implemented on MicaZ nodes with two
                 state-of-the-art routing protocols. Both testbed
                 experiment and large-scale simulation results show
                 significant performance improvements over randomized
                 synchronization controls.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2014:CFE,
  author =       "Jinkyu Lee and Arvind Easwaran and Insik Shin",
  title =        "Contention-free executions for real-time
                 multiprocessor scheduling",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "69:1--69:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2494530",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A time slot is defined as contention-free if the
                 number of jobs with remaining executions in the slot is
                 no larger than the number of processors, or contending,
                 otherwise. Then an important property holds that in any
                 contention-free slot, all jobs with remaining
                 executions are guaranteed to be scheduled as long as
                 the scheduler is work-conserving. This article aims at
                 improving schedulability by utilizing the
                 contention-free slots. To achieve this, this article
                 presents a policy (called CF policy) that moves some
                 job executions from contending slots to contention-free
                 ones. This policy can be employed by any
                 work-conserving, preemptive scheduling algorithm, and
                 we show that any algorithm extended with this policy
                 dominates the original algorithm in terms of
                 schedulability. We also present improved schedulability
                 tests for algorithms that employ this policy, based on
                 the observation that interference from jobs is reduced
                 when their executions are postponed to contention-free
                 slots. Simulation results demonstrate that the CF
                 policy, incorporated into existing algorithms,
                 significantly improves schedulability of those existing
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2014:TMP,
  author =       "Huang Huang and Vivek Chaturvedi and Gang Quan and
                 Jeffrey Fan and Meikang Qiu",
  title =        "Throughput maximization for periodic real-time systems
                 under the maximal temperature constraint",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "70:1--70:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544390",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we study the problem of how to
                 maximize the throughput of a periodic real-time system
                 under a given peak temperature constraint. We assume
                 that different tasks in our system may have different
                 power and thermal characteristics. Two scheduling
                 approaches are presented. The first is built upon
                 processors that can be in either active or sleep mode.
                 By judiciously selecting tasks with different thermal
                 characteristics as well as alternating the processor's
                 active / sleep mode, the sleep period required to cool
                 down the processor is kept at a minimum level, and, as
                 the result, the throughput is maximized. We further
                 extend this approach for processors with dynamic
                 voltage/frequency scaling (DVFS) capability. Our
                 experiments on a large number of synthetic test cases
                 as well as real benchmark programs show that the
                 proposed methods not only consistently outperform the
                 existing approaches in terms of throughput
                 maximization, but also significantly improve the
                 feasibility of tasks when a more stringent temperature
                 constraint is imposed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Elewi:2014:EET,
  author =       "Abdullah Elewi and Mohamed Shalan and Medhat Awadalla
                 and Elsayed M. Saad",
  title =        "Energy-efficient task allocation techniques for
                 asymmetric multiprocessor embedded systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "71:1--71:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544391",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Asymmetric multiprocessor systems are considered
                 power-efficient multiprocessor architectures.
                 Furthermore, efficient task allocation (partitioning)
                 can achieve more energy efficiency at these asymmetric
                 multiprocessor platforms. This article addresses the
                 problem of energy-aware static partitioning of periodic
                 real-time tasks on asymmetric multiprocessor
                 (multicore) embedded systems. The article formulates
                 the problem according to the Dynamic Voltage and
                 Frequency Scaling (DVFS) model supported by the
                 platform and shows that it is an NP-hard problem. Then,
                 the article outlines optimal reference partitioning
                 techniques for each case of DVFS model with suitable
                 assumptions. Finally, the article proposes
                 modifications to the traditional bin-packing techniques
                 and designs novel techniques taking into account the
                 DVFS model supported by the platform. All algorithms
                 and techniques are simulated and compared. The
                 simulation shows promising results, where the proposed
                 techniques reduced the energy consumption by 75\%
                 compared to traditional methods when DVFS is not
                 supported and by 50\% when per-core DVFS is supported
                 by the platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Das:2014:EAT,
  author =       "Anup Das and Akash Kumar and Bharadwaj Veeravalli",
  title =        "Energy-aware task mapping and scheduling for reliable
                 embedded computing systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "72:1--72:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544392",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Task mapping and scheduling are critical in minimizing
                 energy consumption while satisfying the performance
                 requirement of applications enabled on heterogeneous
                 multiprocessor systems. An area of growing concern for
                 modern multiprocessor systems is the increase in the
                 failure probability of one or more component
                 processors. This is especially critical for
                 applications where performance degradation (e.g.,
                 throughput) directly impacts the quality of service
                 requirement. This article proposes a design-time
                 (offline) multi-criterion optimization technique for
                 application mapping on embedded multiprocessor systems
                 to minimize energy consumption for all processor
                 fault-scenarios. A scheduling technique is then
                 proposed based on self-timed execution to minimize the
                 schedule storage and construction overhead at runtime.
                 Experiments conducted with synthetic and real
                 applications from streaming and nonstreaming domains on
                 heterogeneous MPSoCs demonstrate that the proposed
                 technique minimizes energy consumption by 22\% and
                 design space exploration time by $ 100 \times $, while
                 satisfying the throughput requirement for all processor
                 fault-scenarios. For scalable throughput applications,
                 the proposed technique achieves 30\% better throughput
                 per unit energy, compared to the existing techniques.
                 Additionally, the self-timed execution-based scheduling
                 technique minimizes schedule construction time by 95\%
                 and storage overhead by 92\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2014:STN,
  author =       "Xiaohang Wang and Mei Yang and Yingtao Jiang and Peng
                 Liu and Masoud Daneshtalab and Maurizio Palesi and
                 Terrence Mak",
  title =        "On self-tuning networks-on-chip for dynamic
                 network-flow dominance adaptation",
  journal =      j-TECS,
  volume =       "13",
  number =       "2s",
  pages =        "73:1--73:??",
  month =        jan,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2544375.2544393",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 28 17:34:43 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern network-on-chip (NoC) systems are required to
                 handle complex runtime traffic patterns and
                 unprecedented applications. Data traffics of these
                 applications are difficult to fully comprehend at
                 design time so as to optimize the network design.
                 However, it has been discovered that the majority of
                 dataflows in a network are dominated by less than 10\%
                 of the specific pathways. In this article, we introduce
                 a method that is capable of identifying critical
                 pathways in a network at runtime and can then
                 dynamically reconfigure the network to optimize for
                 network performance subject to the identified dominated
                 flows. An online learning and analysis scheme is
                 employed to quickly discover the emerging dominated
                 traffic flows and provides a statistical traffic
                 prediction using regression analysis. The architecture
                 of a self-tuning network is also discussed which can be
                 reconfigured by setting up the identified
                 point-to-point paths for the dominance dataflows in
                 large traffic volumes. The merits of this new approach
                 are experimentally demonstrated using comprehensive NoC
                 simulations. Compared to the conventional network
                 architectures over a range of realistic applications,
                 the proposed self-tuning network approach can
                 effectively reduce the latency and power consumption by
                 as much as 25\% and 24\%, respectively. We also
                 evaluated the configuration time and additional
                 hardware cost. This new approach demonstrates the
                 capability of an adaptive NoC to handle more complex
                 and dynamic applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bournoutian:2013:AAA,
  author =       "Garo Bournoutian and Alex Orailoglu",
  title =        "Application-aware adaptive cache architecture for
                 power-sensitive mobile processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "41:1--41:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539037",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today, mobile smartphones are expected to be able to
                 run the same complex, algorithm-heavy, memory-intensive
                 applications that were originally designed and coded
                 for general-purpose processors. All the while, it is
                 also expected that these mobile processors be
                 power-conscientious as well as of minimal area impact.
                 These devices pose unique usage demands of
                 ultra-portability but also demand an always-on,
                 continuous data access paradigm. As a result, this
                 dichotomy of continuous execution versus long battery
                 life poses a difficult challenge. This article explores
                 a novel approach to mitigating mobile processor power
                 consumption while abating any significant degradation
                 in execution speed. The concept relies on efficiently
                 leveraging both compile-time and runtime application
                 memory behavior to intelligently target adjustments in
                 the cache to significantly reduce overall processor
                 power, taking into account both the dynamic and leakage
                 power footprint of the cache subsystem. The simulation
                 results show a significant reduction in power
                 consumption of approximately 13\% to 29\%, while only
                 incurring a nominal increase in execution time and
                 area.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2013:GOV,
  author =       "Bo Zhou and Kai Xiao and Danny Z. Chen and X. Sharon
                 Hu",
  title =        "{GPU}-optimized volume ray tracing for massive numbers
                 of rays in radiotherapy",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "42:1--42:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Ray tracing within a uniform grid volume is a
                 fundamental process invoked frequently by many
                 applications, especially radiation-dose calculation
                 methods in radiotherapy. However, the conflicting
                 features between the GPU memory architecture and the
                 memory-accessing patterns of volume ray tracing lead to
                 inefficient usage of GPU memory bandwidth and waste of
                 capability of modern GPUs. To improve the ray tracing
                 performance on GPU, we propose a lookup-table-based ray
                 tracing method which is specially optimized towards the
                 GPU memory system for processing a massive number of
                 rays. The proposed method is based on a key observation
                 that many of these applications normally involves a
                 massive number of rays, but their ray tracing may not
                 need to follow a specific execution order. Therefore,
                 we divide the 3D space into many regions (called
                 pyramids) and group together the rays falling into the
                 same pyramid. For each ray group, the volume is rotated
                 and resampled for their raytracing. This
                 divide-and-rotate strategy allows the memory access of
                 the ray tracing process to adopt a table-lookup
                 approach and leads to better memory coalescing on GPU.
                 Our proposed method was thoroughly evaluated in four
                 volume setups with randomly-generated rays. The
                 collapsed-cone convolution/superposition (CCCS) dose
                 calculation method is also implemented with/without the
                 proposed approach to verify the feasibility of our
                 method. Compared with the direct GPU implementation of
                 the popular 3DDDA algorithm, our method provides a
                 speedup in the range of 1.91--2.94X for the volume
                 settings we used. Major performance factors, including
                 ray origins, volume size, and pyramid size, are also
                 analyzed. The proposed technique was also found to be
                 able to give a speedup of 1.61--2.17X over the original
                 GPU implementation of the CCCS algorithm. Our
                 experiment results indicate that the proposed approach
                 is capable of offering better coalesced memory access
                 which eventually boosts the raytracing performance on
                 GPU. Moreover, our approach is conceptually simple and
                 can be readily included into various applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2013:AAF,
  author =       "Yun Liang and Tulika Mitra",
  title =        "An analytical approach for fast and accurate design
                 space exploration of instruction caches",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "43:1--43:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539039",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application-specific system-on-chip platforms create
                 the opportunity to customize the cache configuration
                 for optimal performance with minimal chip area.
                 Simulation, in particular trace-driven simulation, is
                 widely used to estimate cache hit rates. However,
                 simulation is too slow to be deployed in design space
                 exploration, especially when there are hundreds of
                 design points and the traces are huge. In this article,
                 we propose a novel analytical approach for design space
                 exploration of instruction caches. Given the program
                 control flow graph (CFG) annotated only with basic
                 block and control flow edge execution counts, we first
                 model the cache states at each point of the CFG in a
                 probabilistic manner. Then, we exploit the structural
                 similarities among related cache configurations to
                 estimate the cache hit rates for multiple cache
                 configurations in one pass. Experimental results
                 indicate that our analysis is 28--2,500 times faster
                 compared to the fastest known cache simulator while
                 maintaining high accuracy (0.2\% average error) in
                 estimating cache hit rates for a large set of popular
                 benchmarks. Moreover, compared to a state-of-the-art
                 cache design space exploration technique, our approach
                 achieves 304--8,086 times speedup and saves up to 62\%
                 (average 7\%) energy for the evaluated benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bourke:2013:AES,
  author =       "Timothy Bourke and Arcot Sowmya",
  title =        "Analyzing an embedded sensor with timed automata in
                 {Uppaal}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "44:1--44:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539040",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An infrared sensor is modeled and analyzed in Uppaal.
                 The sensor typifies the sort of component that
                 engineers regularly integrate into larger systems by
                 writing interface hardware and software. In all, three
                 main models are developed. In the first model, the
                 timing diagram of the sensor is interpreted and modeled
                 as a timed safety automaton. This model serves as a
                 specification for the complete system. A second model
                 that emphasizes the separate roles of driver and sensor
                 is then developed. It is validated against the timing
                 diagram model using an existing construction that
                 permits the verification of timed trace inclusion, for
                 certain models, by reachability analysis (i.e., model
                 checking). A transmission correctness property is also
                 stated by means of an auxiliary automaton and shown to
                 be satisfied by the model. A third model is created
                 from an assembly language driver program, using a
                 direct translation from the instruction set of a
                 processor with simple timing behavior. This model is
                 validated against the driver component of the second
                 timing diagram model using the timed trace inclusion
                 validation technique. The approach and its limitations
                 offer insight into the nature and challenges of
                 programming in real time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Collins:2013:FFS,
  author =       "Rebecca L. Collins and Luca P. Carloni",
  title =        "Flexible filters in stream programs",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "45:1--45:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539041",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The stream-processing model is a natural fit for
                 multicore systems because it exposes the inherent
                 locality and concurrency of a program and highlights
                 its separable tasks for efficient parallel
                 implementations. We present flexible filters, a
                 load-balancing optimization technique for stream
                 programs. Flexible filters utilize the programmability
                 of the cores in order to improve the data-processing
                 throughput of individual bottleneck tasks by
                 ``borrowing'' resources from neighbors in the stream.
                 Our technique is distributed and scalable because all
                 runtime load-balancing decisions are based on
                 point-to-point handshake signals exchanged between
                 neighboring cores. Load balancing with flexible filters
                 increases the system-level processing throughput of
                 stream applications, particularly those with large
                 dynamic variations in the computational load of their
                 tasks. We empirically evaluate flexible filters in a
                 homogeneous multicore environment over a suite of five
                 real-word stream programs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hashemi:2013:TMF,
  author =       "Matin Hashemi and Mohammad H. Foroozannejad and Soheil
                 Ghiasi",
  title =        "Throughput-memory footprint trade-off in synthesis of
                 streaming software on embedded multiprocessors",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "46:1--46:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539042",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We study the trade-off between throughput and memory
                 footprint of embedded software that is synthesized from
                 acyclic static dataflow (task graph) specifications
                 targeting distributed memory multiprocessors. We
                 identify iteration overlapping as a knob in the
                 synthesis process by which one can trade application
                 throughput for its memory requirement. Given an initial
                 processor assignment and non-overlapped task schedule,
                 we formally present underlying properties of the
                 problem, such as constraints on a valid iteration
                 overlapping, maximum possible throughput, and minimum
                 memory footprint. Moreover, we develop an effective
                 algorithm for generation of a rich set of design points
                 that provide a range of trade-off options. Experimental
                 results on a number of applications and architectures
                 validate the effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Biswas:2013:RTS,
  author =       "Swarnendu Biswas and Rajib Mall and Manoranjan
                 Satpathy",
  title =        "A regression test selection technique for embedded
                 software",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "47:1--47:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539043",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The current approaches for regression test selection
                 of embedded programs are usually based on data- and
                 control-dependency analyses, often augmented with human
                 reasoning. Existing techniques do not take into account
                 additional execution dependencies which may exist among
                 code elements in such programs due to features such as
                 tasks, task deadlines, task precedences, and intertask
                 communications. In this context, we propose a
                 model-based regression test selection technique for
                 such programs. Our technique first constructs a graph
                 model of the program; the proposed graph model has been
                 designed to capture several characteristics of embedded
                 programs, such as task precedence order, priority,
                 intertask communication, timers, exceptions and
                 interrupt handlers, which we consider important for
                 regression-test selection. Our regression test
                 selection technique selects test cases based on an
                 analysis of the constructed graph model. We have
                 implemented our technique to realize a prototype tool.
                 The experimental results obtained using this tool show
                 that, on average, our approach selects about 28.33\%
                 more regression test cases than those selected by a
                 traditional approach. We observed that, on average,
                 36.36\% of the fault-revealing test cases were
                 overlooked by the existing regression test selection
                 technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Majumdar:2013:TRO,
  author =       "Rupak Majumdar and Elaine Render and Paulo Tabuada",
  title =        "A theory of robust omega-regular software synthesis",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "48:1--48:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539044",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A key property for systems subject to uncertainty in
                 their operating environment is robustness: ensuring
                 that unmodeled but bounded disturbances have only a
                 proportionally bounded effect upon the behaviors of the
                 system. Inspired by ideas from robust control and
                 dissipative systems theory, we present a formal
                 definition of robustness as well as algorithmic tools
                 for the design of optimally robust controllers for $
                 \omega $ -regular properties on discrete transition
                 systems. Formally, we define metric automata ---
                 automata equipped with a metric on states --- and
                 strategies on metric automata which guarantee
                 robustness for $ \omega $-regular properties. We
                 present fixed-point algorithms to construct optimally
                 robust strategies in polynomial time. In contrast to
                 strategies computed by classical graph theoretic
                 approaches, the strategies computed by our algorithm
                 ensure that the behaviors of the controlled system
                 gracefully degrade under the action of disturbances;
                 the degree of degradation is parameterized by the
                 magnitude of the disturbance. We show an application of
                 our theory to the design of controllers that tolerate
                 infinitely many transient errors provided they occur
                 infrequently enough.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{You:2013:EAC,
  author =       "Yi-Ping You and Shen-Hong Wang",
  title =        "Energy-aware code motion for {GPU} shader processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "49:1--49:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539045",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Graphics processing units (GPUs) are now being widely
                 adopted in system-on-a-chip designs, and they are often
                 used in embedded systems for manipulating computer
                 graphics or even for general-purpose computation.
                 Energy management is of concern to both hardware and
                 software designers. In this article, we present an
                 energy-aware code-motion framework for a compiler to
                 generate concentrated accesses to input and output
                 (I/O) buffers inside a GPU. Our solution attempts to
                 gather the I/O buffer accesses into clusters, thereby
                 extending the time period during which the I/O buffers
                 are clock or power gated. We performed experiments in
                 which the energy consumption was simulated by
                 incorporating our compiler-analysis and code-motion
                 framework into an in-house compiler tool. The
                 experimental results demonstrated that our mechanisms
                 were effective in reducing the energy consumption of
                 the shader processor by an average of 13.1\% and
                 decreasing the energy-delay product by 2.2\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2013:RAE,
  author =       "Tiantian Liu and Alex Orailoglu and Chun Jason Xue and
                 Minming Li",
  title =        "Register allocation for embedded systems to
                 simultaneously reduce energy and temperature on
                 registers",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "50:1--50:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539046",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy and thermal issues are two important concerns
                 for embedded system design. Diminished energy
                 dissipation leads to a longer battery life, while
                 reduced temperature hotspots decelerate the physical
                 failure mechanisms. The instruction fetch logic
                 associated with register access has a significant
                 contribution towards the total energy consumption.
                 Meanwhile, the register file has also been previously
                 shown to exhibit the highest temperature compared to
                 the rest of the components in an embedded processor.
                 Therefore, the optimization of energy and the
                 resolution of the thermal issue for register accesses
                 are of great significance. In this article, register
                 allocation techniques are studied to simultaneously
                 reduce energy consumption and heat buildup on register
                 accesses for embedded systems. Contrary to prevailing
                 intuition, we observe that optimizing energy and
                 optimizing temperature on register accesses conflict
                 with each other. We introduce a rotator hardware in the
                 instruction decoder to facilitate a balanced solution
                 for the two conflicting objectives. Algorithms for
                 register allocation and refinement are proposed based
                 on the access patterns and the effects of the rotator.
                 Experimental results show that the proposed algorithms
                 obtain notable improvements of energy and peak
                 temperature for embedded applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lizarraga:2013:DPF,
  author =       "Adrian Lizarraga and Roman Lysecky and Susan Lysecky
                 and Ann Gordon-Ross",
  title =        "Dynamic profiling and fuzzy-logic-based optimization
                 of sensor network platforms",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "51:1--51:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539047",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The commercialization of sensor-based platforms is
                 facilitating the realization of numerous sensor network
                 applications with diverse application requirements.
                 However, sensor network platforms are becoming
                 increasingly complex to design and optimize due to the
                 multitude of interdependent parameters that must be
                 considered. To further complicate matters, application
                 experts oftentimes are not trained engineers, but
                 rather biologists, teachers, or agriculturists who wish
                 to utilize the sensor-based platforms for various
                 domain-specific tasks. To assist both platform
                 developers and application experts, we present a
                 centralized dynamic profiling and optimization platform
                 for sensor-based systems that enables application
                 experts to rapidly optimize a sensor network for a
                 particular application without requiring extensive
                 knowledge of, and experience with, the underlying
                 physical hardware platform. In this article, we present
                 an optimization framework that allows developers to
                 characterize application requirements through
                 high-level design metrics and fuzzy-logic-based
                 optimization. We further analyze the benefits of
                 utilizing dynamic profiling information to eliminate
                 the guesswork of creating a ``good'' benchmark, present
                 several reoptimization evaluation algorithms used to
                 detect if re-optimization is necessary, and highlight
                 the benefits of the proposed dynamic optimization
                 framework compared to static optimization
                 alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ben-Asher:2013:BUV,
  author =       "Yosi Ben-Asher and Nadav Rotem",
  title =        "The benefits of using variable-length pipelined
                 operations in high-level synthesis",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "52:1--52:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539048",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Current high-level synthesis systems synthesize
                 arithmetic units of a fixed known number of stages, and
                 the scheduler mainly determines when units are
                 activated. We focus on scheduling techniques for the
                 high-level synthesis of pipelined arithmetic units
                 where the number of stages of these operations is a
                 free parameter of the synthesis. This problem is
                 motivated by the ability to automatically create
                 pipelined functional units, such as multipliers, with
                 different pipe lengths. These units have different
                 characteristics in terms of parallelism level, clock
                 latency, frequency, etc. This article presents the
                 Variable-length Pipeline Scheduler (VPS). The ability
                 to synthesize variable-length pipelined units expands
                 the known scheduling problem of high-level synthesis to
                 include a search for a minimal number of hardware units
                 (operations) and their desired number of stages. The
                 proposed search procedure is based on algorithms that
                 find a local minima in a d -dimensional grid, thus
                 avoiding the need to evaluate all possible points in
                 the space. We have implemented a C language compiler
                 for VPS targeting FPGAs. Our results demonstrate that
                 using variable-length pipeline units can reduce the
                 overall resource usage and improve the execution time
                 when synthesized onto an FPGA. The proposed search is
                 sufficiently fast, taking only a few seconds, allowing
                 an interactive mode of work. A comparison with xPilot
                 shows a significant saving of hardware resources while
                 maintaining comparable execution times of the resulting
                 circuits. This work is an extension of a previous paper
                 [Ben-Asher and Rotem 2008]",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2013:RDD,
  author =       "Yu-Ming Chang and Pi-Cheng Hsiu and Yuan-Hao Chang and
                 Che-Wei Chang",
  title =        "A resource-driven {DVFS} scheme for smart handheld
                 devices",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "53:1--53:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539049",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reducing the energy consumption of the emerging genre
                 of smart handheld devices while simultaneously
                 maintaining mobile applications and services is a major
                 challenge. This work is inspired by an observation on
                 the resource usage patterns of mobile applications. In
                 contrast to existing DVFS scheduling algorithms and
                 history-based prediction techniques, we propose a
                 resource-driven DVFS scheme in which resource state
                 machines are designed to model the resource usage
                 patterns in an online fashion to guide DVFS. We have
                 implemented the proposed scheme on Android smartphones
                 and conducted experiments based on real-world
                 applications. The results are very encouraging and
                 demonstrate the efficacy of the proposed scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kyrkou:2013:HAR,
  author =       "Christos Kyrkou and Christos Ttofis and Theocharis
                 Theocharides",
  title =        "A hardware architecture for real-time object detection
                 using depth and edge information",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "54:1--54:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539050",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Emerging embedded 3D vision systems for robotics and
                 security applications utilize object detection to
                 perform video analysis in order to intelligently
                 interact with their host environment and take
                 appropriate actions. Such systems have high performance
                 and high detection-accuracy demands, while requiring
                 low energy consumption, especially when dealing with
                 embedded mobile systems. However, there is a large
                 image search space involved in object detection,
                 primarily because of the different sizes in which an
                 object may appear, which makes it difficult to meet
                 these demands. Hence, it is possible to meet such
                 constraints by reducing the search space involved in
                 object detection. To this end, this article proposes a
                 depth and edge accelerated search method and a
                 dedicated hardware architecture that implements it to
                 provide an efficient platform for generic real-time
                 object detection. The hardware integration of depth and
                 edge processing mechanisms, with a support vector
                 machine classification core onto an FPGA platform,
                 results in significant speed-ups and improved detection
                 accuracy. The proposed architecture was evaluated using
                 images of various sizes, with results indicating that
                 the proposed architecture is capable of achieving
                 real-time frame rates for a variety of image sizes (271
                 fps for 320 $ \times $ 240, 42 fps for 640 $ \times $
                 480, and 23 fps for 800 $ \times $ 600) compared to
                 existing works, while reducing the false-positive rate
                 by 52\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2013:ALC,
  author =       "Li-Pin Chang and Tung-Yang Chou and Li-Chun Huang",
  title =        "An adaptive, low-cost wear-leveling algorithm for
                 multichannel solid-state disks",
  journal =      j-TECS,
  volume =       "13",
  number =       "3",
  pages =        "55:1--55:??",
  month =        dec,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2539036.2539051",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Dec 18 19:07:39 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multilevel flash memory cells double or even triple
                 storage density, producing affordable solid-state disks
                 for end users. As flash memory endures only limited
                 program-erase cycles, solid-state disks employ
                 wear-leveling methods to prevent any portions of flash
                 memory from being retired prematurely. Modern
                 solid-state disks must consider wear evenness at both
                 block and channel levels. This study first presents a
                 block-level wear-leveling method whose design has two
                 new ideas. First, the proposed method reuses the
                 intelligence available in flash-translation layers so
                 it does not require any new data structures. Second, it
                 adaptively tunes the threshold of block-level wear
                 leveling according to the runtime write pattern. This
                 study further introduces a new channel-level
                 wear-leveling strategy, because block-level wear
                 leveling is confined to a channel, but realistic
                 workloads do not evenly write all channels. The
                 proposed method swaps logical blocks among channels for
                 achieving an eventually-even state of channel
                 lifetimes. A series of trace-driven simulations show
                 that our wear-leveling method outperforms existing
                 approaches in terms of wear evenness and overhead
                 reduction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2014:EES,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Embedded systems --- more than
                 methodology",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "99:1--99:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2587894",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Daneshtalab:2014:ESI,
  author =       "Masoud Daneshtalab and Maurizio Palesi and Juha
                 Plosila",
  title =        "Editorial: Special issue on design challenges for
                 many-core processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "100:1--100:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567941",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Koohi:2014:TSL,
  author =       "Somayyeh Koohi and Yawei Yin and Shaahin Hessabi and
                 S. J. Ben Yoo",
  title =        "Towards a scalable, low-power all-optical architecture
                 for networks-on-chip",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "101:1--101:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567930",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a scalable wavelength-routed
                 optical Network on Chip (NoC) based on the Spidergon
                 topology, named Power-efficient Scalable
                 Wavelength-routed Network-on-chip (PeSWaN). The key
                 idea of the proposed all-optical architecture is the
                 utilization of per-receiver wavelengths in the data
                 network to prevent network contention and the adoption
                 of per-sender wavelengths in the control network to
                 avoid end-point contention. By performing a series of
                 simulations, we study the efficiency of the proposed
                 architecture, its power and energy consumption, and the
                 data transmission delay. Moreover, we compare the
                 proposed architecture with electrical NoCs and
                 alternative ONoC architectures under various traffic
                 patterns.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lhuillier:2014:HHA,
  author =       "Yves Lhuillier and Maroun Ojail and Alexandre Guerre
                 and Jean-Marc Philippe and Karim Ben Chehida and Farhat
                 Thabet and Caaliph Andriamisaina and Chafic Jaber and
                 Rapha{\"e}l David",
  title =        "{HARS}: a hardware-assisted runtime software for
                 embedded many-core architectures",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "102:1--102:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2517311",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The current trend in embedded computing consists in
                 increasing the number of processing resources on a
                 chip. Following this paradigm, cluster-based many-core
                 accelerators with a shared hierarchical memory have
                 emerged. Handling synchronizations on these
                 architectures is critical since parallel
                 implementations speed-ups of embedded applications
                 strongly depend on the ability to exploit the largest
                 possible number of cores while limiting task management
                 overhead. This article presents the combination of a
                 low-overhead complete runtime software and a flexible
                 hardware accelerator for synchronizations called HARS
                 (Hardware-Assisted Runtime Software). Experiments on a
                 multicore test chip showed that the hardware
                 accelerator for synchronizations has less than 1\% area
                 overhead compared to a cluster of the chip while
                 reducing synchronization latencies (up to 2.8 times
                 compared to a test-and-set implementation) and
                 contentions. The runtime software part offers basic
                 features like memory management but also optimized
                 execution engines to allow the easy and efficient
                 extraction of the parallelism in applications with
                 multiple programming models. By using the hardware
                 acceleration as well as a very low overhead task
                 scheduling software technique, we show that HARS
                 outperforms an optimized state-of-the-art task
                 scheduler by 13\% for the execution of a parallel
                 application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2014:CTR,
  author =       "Qiang Yang and Jian Fu and Raphael Poss and Chris
                 Jesshope",
  title =        "On-chip traffic regulation to reduce coherence
                 protocol cost on a microthreaded many-core architecture
                 with distributed caches",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "103:1--103:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567931",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "When hardware cache coherence scales to many cores on
                 chip, over saturated traffic of the shared memory
                 system may offset the benefit from massive hardware
                 concurrency. In this article, we investigate the cost
                 of a write-update protocol in terms of on-chip memory
                 network traffic and its adverse effects on the system
                 performance based on a multithreaded many-core
                 architecture with distributed caches. We discuss
                 possible software and hardware solutions to alleviate
                 the network pressure. We find that in the context of
                 massive concurrency, by introducing a write-merging
                 buffer with 0.46\% area overhead to each core,
                 applications with good locality and concurrency are
                 boosted up by 18.74\% in performance on average. Other
                 applications also benefit from this addition and even
                 achieve a throughput increase of 5.93\%. In addition,
                 this improvement indicates that higher levels of
                 concurrency per core can be exploited without impacting
                 performance, thus tolerating latency better and giving
                 higher processor efficiencies compared to other
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Parikh:2014:FCF,
  author =       "Ritesh Parikh and Valeria Bertacco",
  title =        "{ForEVeR}: a complementary formal and runtime
                 verification approach to correct {NoC} functionality",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "104:1--104:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2514871",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As silicon technology scales, modern processor and
                 embedded systems are rapidly shifting towards complex
                 chip multi-processor (CMP) and system-on-chip (SoC)
                 designs. As a side effect of complexity of these
                 designs, ensuring their correctness has become
                 increasingly problematic. Within these domains,
                 Network-on-Chips (NoCs) are a de-facto choice to
                 implement on-chip interconnect; their design is quickly
                 becoming extremely complex in order to keep up with
                 communication performance demands. As a result, design
                 errors in the NoC may go undetected and escape into the
                 final silicon. In this work, we propose ForEVeR, a
                 solution that complements the use of formal methods and
                 runtime verification to ensure functional correctness
                 in NoCs. Formal verification, due to its scalability
                 limitations, is used to verify smaller modules, such as
                 individual router components. To deliver correctness
                 guarantees for the complete network, we propose a
                 network-level detection and recovery solution that
                 monitors the traffic in the NoC and protects it against
                 escaped functional bugs. To this end, ForEVeR augments
                 the baseline NoC with a lightweight checker network
                 that alerts destination nodes of incoming packets ahead
                 of time. If a bug is detected, flagged by missed packet
                 arrivals, our recovery mechanism delivers the in-flight
                 data safely to the intended destination via the checker
                 network. ForEVeR's experimental evaluation shows that
                 it can recover from NoC design errors at only 4.9\%
                 area cost for an $ 8 \times 8 $ mesh interconnect, over
                 a time interval ranging from 0.5K to 30K cycles per
                 recovery event, and it incurs no performance overhead
                 in the absence of errors. ForEVeR can also protect NoC
                 operations against soft-errors: a growing concern with
                 the scaling of silicon. ForEVeR leverages the same
                 monitoring hardware to detect soft-error
                 manifestations, in addition to design-errors. Recovery
                 of the soft-error affected packets is guaranteed by
                 building resiliency features into our checker network.
                 ForEVeR incurs minimal performance penalty up to a flit
                 error rate of 0.01\% in lightly loaded networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DelBarrio:2014:ULP,
  author =       "Alberto A. {Del Barrio} and Nader Bagherzadeh and
                 Rom{\'a}n Hermida",
  title =        "Ultra-low-power adder stage design for exascale
                 floating point units",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "105:1--105:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567932",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Currently, the most powerful supercomputers can
                 provide tens of petaflops. Future many-core systems are
                 estimated to provide an exaflop. However, the power
                 budget limitation makes these machines still infeasible
                 and unaffordable. Floating Point Units (FPUs) are
                 critical from both the power consumption and
                 performance points of view of today's microprocessors
                 and supercomputers. Literature offers very different
                 designs. Some of them are focused on increasing
                 performance no matter the penalty, and others on
                 decreasing power at the expense of lower performance.
                 In this article, we propose a novel approach for
                 reducing the power of the FPU without degrading the
                 rest of parameters. Concretely, this power reduction is
                 also accompanied by an area reduction and a performance
                 improvement. Hence, an overall energy gain will be
                 produced. According to our experiments, our proposed
                 unit consumes 17.5\%, 23\% and 16.5\% less energy for
                 single, double and quadruple precision, with an
                 additional 15\%, 21.5\% and 14.5\% delay reduction,
                 respectively. Furthermore, area is also diminished by
                 4\%, 4.5\% and 5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2014:YES,
  author =       "Yu-Jen Huang and Jin-Fu Li",
  title =        "Yield-enhancement schemes for multicore processor and
                 memory stacked {$3$D ICs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "106:1--106:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567933",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A three-dimensional (3D) integrated circuit (IC) with
                 multiple dies vertically connected by
                 through-silicon-via (TSV) offers many benefits over
                 current 2D ICs. Multicore logic-memory die stacking has
                 been considered as one candidate for 3D ICs by
                 utilizing the TSV to provide high data bandwidth
                 between logic and memory. However, 3D ICs suffer from
                 the low-yield issue. This article proposes effective
                 yield-enhancement techniques for multicore die-stacked
                 3D ICs. Two reconfiguration schemes are proposed to
                 logically swap the positions of cores in the dies of 3D
                 ICs such that the yield of 3D ICs is increased. Two
                 algorithms also are proposed to determine the
                 reconfiguration effectively. Simulation results show
                 that the proposed reconfiguration schemes can achieve a
                 yield gain ranging from 1\% to 11\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Arnold:2014:TPH,
  author =       "Oliver Arnold and Emil Matus and Benedikt Noethen and
                 Markus Winter and Torsten Limberg and Gerhard
                 Fettweis",
  title =        "{Tomahawk}: Parallelism and heterogeneity in
                 communications signal processing {MPSoCs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "107:1--107:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2517087",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneity and parallelism in MPSoCs for 4G (and
                 beyond) communications signal processing are inevitable
                 in order to meet stringent power constraints and
                 performance requirements. The question arises on how to
                 cope with the problem of system programmability and
                 runtime management incurred by the statically or even
                 dynamically varying number and type of processing
                 elements. This work addresses this challenge by
                 proposing the concept of a heterogeneous many-core
                 platform called Tomahawk. Apart from the definition of
                 the system architecture, in this approach a unified
                 framework including a model of computation, a
                 programming interface and a dedicated runtime
                 management unit called CoreManager is proposed. The
                 increase of system complexity in terms of application
                 parallelism and number of resources may lead to a
                 dramatic increase of the management costs, hence
                 causing performance degradation. For this reason, the
                 efficient implementation of the CoreManager becomes a
                 major issue in system design. This work compares the
                 performance and capabilities of various CoreManager
                 HW/SW solutions, based on ASIC, RISC and ASIP
                 paradigms. The results demonstrate that the proposed
                 ASIP-based solution approaches the performance of the
                 ASIC realization, while preserving the full flexibility
                 of the software (RISC-based) implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jin:2014:PPA,
  author =       "Yuho Jin and Timothy Mark Pinkston",
  title =        "{PAIS}: Parallelism-aware interconnect scheduling in
                 multicores",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "108:1--108:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567934",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multicore processors have the potential to deliver
                 scalable performance by distributing computation across
                 multiple cores. However, the communication cost of
                 parallel application thread execution may significantly
                 limit the performance achievable due to latency and
                 contention on shared resources in the on-chip network
                 of multicores experienced by packets from critical
                 threads. We present PAIS, Parallelism-Aware
                 Interconnect Scheduling, that bolsters performance and
                 energy efficiency of parallel applications. PAIS
                 dynamically detects thread execution progress based on
                 communication latency and scheduling, and it
                 accelerates communication for slowly executing threads
                 by prioritizing packets from those threads with flow
                 control and priority-based arbitration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Casu:2014:UMI,
  author =       "Mario R. Casu and Francesco Colonna and Marco Crepaldi
                 and Danilo Demarchi and Mariagrazia Graziano and
                 Maurizio Zamboni",
  title =        "{UWB} microwave imaging for breast cancer detection:
                 Many-core, {GPU}, or {FPGA?}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "109:1--109:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2530534",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An UWB microwave imaging system for breast cancer
                 detection consists of antennas, transceivers, and a
                 high-performance embedded system for elaborating the
                 received signals and reconstructing breast images. In
                 this article we focus on this embedded system. To
                 accelerate the image reconstruction, the Beamforming
                 phase has to be implemented in a parallel fashion. We
                 assess its implementation in three currently available
                 high-end platforms based on a multicore CPU, a GPU, and
                 an FPGA, respectively. We then project the results
                 applying technology scaling rules to future many-core
                 CPUs, many-thread GPUs, and advanced FPGAs. We consider
                 an optimistic case in which available resources
                 increase according to Moore's law only, and a
                 pessimistic case in which only a fraction of those
                 resources are available due to a limited power budget.
                 In both scenarios, an implementation that includes a
                 high-end FPGA outperforms the other alternatives. Since
                 the number of effectively usable cores in future
                 many-cores will be power-limited, and there is a trend
                 toward the integration of power-efficient accelerators,
                 we conjecture that a chip consisting of a many-core
                 section and a reconfigurable logic section will be the
                 perfect platform for this application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Palesi:2014:ESS,
  author =       "Maurizio Palesi and Todor Stefanov",
  title =        "Editorial: Special Section on {ESTIMedia'13}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "110:1--110:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567942",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2014:EOR,
  author =       "Gang Chen and Kai Huang and Alois Knoll",
  title =        "Energy optimization for real-time multiprocessor
                 system-on-chip with optimal {DVFS} and {DPM}
                 combination",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "111:1--111:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567935",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy optimization is a critical design concern for
                 embedded systems. Combining D VFS+DPM is considered as
                 one preferable technique to reduce energy consumption.
                 There have been optimal DVFS+DPM algorithms for
                 periodic independent tasks running on uniprocessor in
                 the literature. Optimal combination of DVFS and DPM for
                 periodic dependent tasks on multicore systems is
                 however not yet reported. The challenge of this problem
                 is that the idle intervals of cores are not easy to
                 model. In this article, a novel technique is proposed
                 to directly model the idle intervals of individual
                 cores such that both DVFS and DPM can be optimized at
                 the same time. Based on this technique, the energy
                 optimization problem is formulated by means of mixed
                 integrated linear programming. We also present
                 techniques to prune the exploration space of the
                 formulation. Experimental results using real-world
                 benchmarks demonstrate the effectiveness of our
                 approach compared to existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Abdel-Khalek:2014:PSP,
  author =       "Rawan Abdel-Khalek and Valeria Bertacco",
  title =        "Post-silicon platform for the functional diagnosis and
                 debug of networks-on-chip",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "112:1--112:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567936",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing number of units in today's
                 systems-on-chip and multicore processors has led to
                 complex intra-chip communication solutions.
                 Specifically, Networks-on-Chip (NoCs) have emerged as a
                 favorable fabric to provide high bandwidth and low
                 latency in connecting many units in a same chip. To
                 achieve these goals, the NoC often includes complex
                 components and advanced features, leading to the
                 development of large and highly complex interconnect
                 subsystems. One of the biggest challenges in these
                 designs is to ensure the correct functionality of this
                 communication infrastructure. To support this goal, an
                 increasing fraction of the validation effort has
                 shifted to post-silicon validation, because it permits
                 exercising network activities that are too complex to
                 be validated in pre-silicon. However, post-silicon
                 validation is hindered by the lack of observability of
                 the network's internal operations and thus, diagnosing
                 functional errors during this phase is very difficult.
                 In this work, we propose a post-silicon validation
                 platform that improves observability of network
                 operations by taking periodic snapshots of the traffic
                 traversing the network. Each node's local cache is
                 configured to temporarily store the snapshot logs in a
                 designated area reserved for post-silicon validation
                 and relinquished after product release. Each snapshot
                 log is analyzed locally by a software algorithm running
                 on its corresponding core, in order to detect
                 functional errors. Upon error detection, all snapshot
                 logs are aggregated at a central location to extract
                 additional debug data, including an overview of network
                 traffic surrounding the error event, as well as a
                 partial reconstruction of the routes followed by
                 packets in flight at the time. In our experiments, we
                 found that this approach allows us to detect several
                 types of functional errors, as well as observe, on
                 average, over 50\% of the network's traffic and
                 reconstruct at least half of each of their routes
                 through the network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dasari:2014:NCA,
  author =       "Dakshina Dasari and Borislav Nikoli{\'c} and Vincent
                 N{\'e}lis and Stefan M. Petters",
  title =        "{NoC} contention analysis using a branch-and-prune
                 algorithm",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "113:1--113:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567937",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "``Many-core'' systems based on a Network-on-Chip (NoC)
                 architecture offer various opportunities in terms of
                 performance and computing capabilities, but at the same
                 time they pose many challenges for the deployment of
                 real-time systems, which must fulfill specific timing
                 requirements at runtime. It is therefore essential to
                 identify, at design time, the parameters that have an
                 impact on the execution time of the tasks deployed on
                 these systems and the upper bounds on the other key
                 parameters. The focus of this work is to determine an
                 upper bound on the traversal time of a packet when it
                 is transmitted over the NoC infrastructure. Towards
                 this aim, we first identify and explore some
                 limitations in the existing recursive-calculus-based
                 approaches to compute the Worst-Case Traversal Time
                 (WCTT) of a packet. Then, we extend the existing model
                 by integrating the characteristics of the tasks that
                 generate the packets. For this extended model, we
                 propose an algorithm called ``Branch and Prune'' (BP).
                 Our proposed method provides tighter and safe estimates
                 than the existing recursive-calculus-based approaches.
                 Finally, we introduce a more general approach, namely
                 ``Branch, Prune and Collapse'' (BPC) which offers a
                 configurable parameter that provides a flexible
                 trade-off between the computational complexity and the
                 tightness of the computed estimate. The
                 recursive-calculus methods and BP present two special
                 cases of BPC when a trade-off parameter is $1$ or $
                 \infty $, respectively. Through simulations, we analyze
                 this trade-off, reason about the implications of
                 certain choices, and also provide some case studies to
                 observe the impact of task parameters on the WCTT
                 estimates.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "113",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lashgar:2014:HHI,
  author =       "Ahmad Lashgar and Ahmad Khonsari and Amirali
                 Baniasadi",
  title =        "{HARP}: {Harnessing inActive thReads in many-core
                 Processors}",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "114:1--114:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567938",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "SIMT accelerators are equipped with thousands of
                 computational resources. Conventional accelerators,
                 however, fail to fully utilize available resources due
                 to branch and memory divergences. This underutilization
                 is manifested in two underlying inefficiencies:
                 pipeline width underutilization and pipeline depth
                 underutilization. Width underutilization occurs when
                 SIMD execution units are not entirely utilized due to
                 branch divergences. This affects lane activity and
                 results in SIMD inefficiency. Depth underutilization
                 takes place when the pipeline runs out of active
                 threads and is forced to leave pipeline stages idle.
                 This work addresses both inefficiencies by harnessing
                 inactive threads available to the pipeline. We
                 introduce Harnessing inActive thReads in many-core
                 Processors (or simply HARP) to improve width and depth
                 utilization in accelerators. We show how using inactive
                 yet ready threads can enhance performance. Moreover, we
                 investigate implementation details and study
                 microarchitectural changes needed to build a
                 HARP-enhanced accelerator. Furthermore, we evaluate
                 HARP under a variety of microarchitectural design
                 points. We measure the area overhead associated with
                 HARP and compare to conventional alternatives. Under
                 Fermi-like GPUs, we show that HARP provides 10\%
                 speedup on average (maximum of 1.6X) at the cost of
                 3.5\% area overhead. Our analysis shows that HARP
                 performs better under narrower SIMD and shorter
                 pipelines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "114",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Banaiyanmofrad:2014:NBF,
  author =       "Abbas Banaiyanmofrad and Gustavo Gir{\~a}o and Nikil
                 Dutt",
  title =        "{NoC}-based fault-tolerant cache design in chip
                 multiprocessors",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "115:1--115:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567939",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in technology scaling increasingly make
                 emerging Chip MultiProcessor (CMP) platforms more
                 susceptible to failures that cause various reliability
                 challenges. In such platforms, error-prone on-chip
                 memories (caches) continue to dominate the chip area.
                 Also, Network-on-Chip (NoC) fabrics are increasingly
                 used to manage the scalability of these architectures.
                 We present a novel solution for efficient
                 implementation of fault-tolerant design of Last-Level
                 Cache (LLC) in CMP architectures. The proposed approach
                 leverages the interconnection network fabric to protect
                 the LLC cache banks against permanent faults in an
                 efficient and scalable way. During an LLC access to a
                 faulty block, the network detects and corrects the
                 faults, returning the fault-free data to the requesting
                 core. Leveraging the NoC interconnection fabric,
                 designers can implement any cache fault-tolerant scheme
                 in an efficient, modular, and scalable manner for
                 emerging multicore/manycore platforms. We propose four
                 different policies for implementing a remapping-based
                 fault-tolerant scheme leveraging the NoC fabric in
                 different settings. The proposed policies enable design
                 trade-offs between NoC traffic (packets sent through
                 the network) and the intrinsic parallelism of these
                 communication mechanisms, allowing designers to tune
                 the system based on design constraints. We perform an
                 extensive design space exploration on NoC benchmarks to
                 demonstrate the usability and efficacy of our approach.
                 In addition, we perform sensitivity analysis to observe
                 the behavior of various policies in reaction to
                 improvements in the NoC architecture. The overheads of
                 leveraging the NoC fabric are minimal: on an 8-core,
                 16-cache-bank CMP we demonstrate reliable access to
                 LLCs with additional overheads of less than 3\% in area
                 and less than 7\% in power.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "115",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bahirat:2014:MHP,
  author =       "Shirish Bahirat and Sudeep Pasricha",
  title =        "{METEOR}: Hybrid photonic ring-mesh network-on-chip
                 for multicore architectures",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "116:1--116:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2567940",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With increasing application complexity and
                 improvements in process technology, Chip
                 MultiProcessors (CMPs) with tens to hundreds of cores
                 on a chip are becoming a reality. Networks-on-Chip
                 (NoCs) have emerged as scalable communication fabrics
                 that can support high bandwidths for these massively
                 parallel multicore systems. However, traditional
                 electrical NoC implementations still need to overcome
                 the challenges of high data transfer latencies and
                 large power consumption. On-chip photonic interconnects
                 with high performance-per-watt characteristics have
                 recently been proposed as an alternative to address
                 these challenges for intra-chip communication. In this
                 article, we explore using low-cost photonic
                 interconnects on a chip to enhance traditional
                 electrical NoCs. Our proposed hybrid photonic ring-mesh
                 NoC (METEOR) utilizes a configurable photonic ring
                 waveguide coupled to a traditional 2D electrical mesh
                 NoC. Experimental results indicate a strong motivation
                 to consider the proposed architecture for future CMPs,
                 as it can provide about 5$ \times $ reduction in power
                 consumption and improved throughput and access
                 latencies, compared to traditional electrical 2D mesh
                 and torus NoC architectures. Compared to other
                 previously proposed hybrid photonic NoC fabrics such as
                 the hybrid photonic torus, Corona, and Firefly, our
                 proposed fabric is also shown to have lower photonic
                 area overhead, power consumption, and energy-delay
                 product, while maintaining competitive throughput and
                 latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "116",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Foglia:2014:ERI,
  author =       "Pierfrancesco Foglia and Marco Solinas",
  title =        "Exploiting replication to improve performances of
                 {NUCA-based} {CMP} systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "3s",
  pages =        "117:1--117:??",
  month =        mar,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2566568",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 24 17:17:02 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Improvements in semiconductor nanotechnology made chip
                 multiprocessors the reference architecture for
                 high-performance microprocessors. CMPs usually adopt
                 large Last-Level Caches (LLC) shared among cores and
                 private L1 caches, whose performances depend on the
                 wire-delay dominated response time of LLC. NUCA
                 (NonUniform Cache Architecture) caches represent a
                 viable solution for tolerating wire-delay effects. In
                 this article, we present Re-NUCA, a NUCA cache that
                 exploits replication of blocks inside the LLC to avoid
                 performance limitations of D-NUCA caches due to
                 conflicting access to shared data. Results show that a
                 Re-NUCA LLC permits to improve performances of more
                 than 5\% on average, and up to 15\% for applications
                 that strongly suffer from conflicting access to shared
                 data, while reducing network traffic and power
                 consumption with respect to D-NUCA caches. Besides, it
                 outperforms different S-NUCA schemes optimized with
                 victim replication.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "117",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2014:EEE,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Embedded everywhere for everyone",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "74:1--74:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2559122",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lam:2014:REC,
  author =       "Siew-Kei Lam and Thambipillai Srikanthan and
                 Christopher T. Clarke",
  title =        "Rapid evaluation of custom instruction selection
                 approaches with {FPGA} estimation",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "75:1--75:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560014",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The main aim of this article is to demonstrate that a
                 fast and accurate FPGA estimation engine is
                 indispensable in design flows for custom instruction
                 (template) selection. The need for a FPGA estimation
                 engine stems from the difficulty in predicting the FPGA
                 performance measures of selected custom instructions.
                 We will present a FPGA estimation technique that
                 partitions the high-level representation of custom
                 instructions into clusters based on the structural
                 organization of the target FPGA, while taking into
                 account general logic synthesis principles adopted by
                 FPGA tools. In this work, we have evaluated a widely
                 used graph covering algorithm with various heuristics
                 for custom instruction selection. In addition, we
                 present an algorithm called Refined Largest Fit First
                 (RLFF) that relies on a graph covering heuristic to
                 select non-overlapping superset templates, which
                 typically incorporate frequently used basic templates.
                 The initial solution is further refined by considering
                 overlapping templates that were ignored previously to
                 see if their introduction could lead to higher
                 performance. While RLFF provides the most efficient
                 cover compared to the ILP method and other graph
                 covering heuristics, FPGA estimation results reveals
                 that RLFF leads to the worst performance in certain
                 applications. It is therefore a worthy proposition to
                 equip design flows with accurate FPGA estimation in
                 order to rapidly determine the most profitable custom
                 instruction approach for a given application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Maggio:2014:TSC,
  author =       "Martina Maggio and Federico Terraneo and Alberto
                 Leva",
  title =        "Task scheduling: a control-theoretical viewpoint for a
                 general and flexible solution",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "76:1--76:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560015",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a new approach to the design of
                 task scheduling algorithms, where system-theoretical
                 methodologies are used throughout. The proposal implies
                 a significant perspective shift with respect to
                 mainstream design practices, but yields large payoffs
                 in terms of simplicity, flexibility, solution
                 uniformity for different problems, and possibility to
                 formally assess the results also in the presence of
                 unpredictable run-time situations. A complete
                 implementation example is illustrated, together with
                 various comparative tests, and a methodological
                 treatise of the matter.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2014:EEE,
  author =       "Wei Dong and Yunhao Liu and Chun Chen and Lin Gu and
                 Xiaofan Wu",
  title =        "{Elon}: Enabling efficient and long-term reprogramming
                 for wireless sensor networks",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "77:1--77:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560017",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a new mechanism called Elon for enabling
                 efficient and long-term reprogramming in wireless
                 sensor networks. Elon reduces the transferred code size
                 significantly by introducing the concept of replaceable
                 component. It avoids the cost of hardware reboot with a
                 novel software reboot mechanism. Moreover, it
                 significantly prolongs the reprogrammable lifetime
                 (i.e., the time period during which the sensor nodes
                 can be reprogrammed) by avoiding flash writes for
                 TelosB nodes. Experimental results show that Elon
                 transfers up to 120--389 times less information than
                 Deluge, and 18--42 times less information than Stream.
                 The software reboot mechanism that Elon applies reduces
                 the rebooting cost by 50.4\%--53.87\% in terms of
                 beacon packets, and 56.83\% in terms of unsynchronized
                 nodes. In addition, Elon prolongs the reprogrammable
                 lifetime by a factor of 3.3.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2014:BAM,
  author =       "Shuai Li and Yuesheng Lou and Bo Liu",
  title =        "{Bluetooth} aided mobile phone localization: a
                 nonlinear neural circuit approach",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "78:1--78:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560018",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is meaningful to design a strategy to roughly
                 localize mobile phones without a GPS by exploiting
                 existing conditions and devices especially in
                 environments without GPS availability (e.g., tunnels,
                 subway stations, etc.). The availability of Bluetooth
                 devices for most phones and the existence of a number
                 of GPS equipped phones in a crowd of phone users enable
                 us to design a Bluetooth aided mobile phone
                 localization strategy. With the position of GPS
                 equipped phones as beacons, and with the Bluetooth
                 connection between neighbor phones as proximity
                 constraints, we formulate the problem into an
                 inequality problem defined on the Bluetooth network. A
                 recurrent neural network is developed to solve the
                 problem distributively in real time. The convergence of
                 the neural network and the solution feasibility to the
                 defined problem are both theoretically proven. The
                 hardware implementation architecture of the proposed
                 neural network is also given in this article. As
                 applications, rough localizations of drivers in a
                 tunnel and localization of customers in a supermarket
                 are explored and simulated. Simulations demonstrate the
                 effectiveness of the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2014:MON,
  author =       "Jingtong Hu and Qingfeng Zhuge and Chun Jason Xue and
                 Wei-Che Tseng and Edwin H.-M. Sha",
  title =        "Management and optimization for nonvolatile
                 memory-based hybrid scratchpad memory on multicore
                 embedded processors",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "79:1--79:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560019",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The recent emergence of various Non-Volatile Memories
                 (NVMs), with many attractive characteristics such as
                 low leakage power and high-density, provides us with a
                 new way of addressing the memory power consumption
                 problem. In this article, we target embedded CMPs, and
                 propose a novel Hybrid Scratch Pad Memory (HSPM)
                 architecture which consists of SRAM and NVM to take
                 advantage of the ultra-low leakage power, high density
                 of NVM, and fast access of SRAM. A novel data
                 allocation algorithm as well as an algorithm to
                 determine the NVM/SRAM ratio for the novel HSPM
                 architecture are proposed. The experimental results
                 show that the data allocation algorithm can reduce the
                 memory access time by 33.51\% and the dynamic energy
                 consumption by 16.81\% on average for the HSPM
                 architecture when compared with a greedy algorithm. The
                 NVM/SRAM size determination algorithm can further
                 reduce the memory access time by 14.7\% and energy
                 consumption by 20.1\% on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2014:MBM,
  author =       "Heeseok Kim and Dong-Guk Han and Seokhie Hong and
                 Jaecheol Ha",
  title =        "Message blinding method requiring no multiplicative
                 inversion for {RSA}",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "80:1--80:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560020",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a new message blinding methods
                 requiring no multiplicative inversion for RSA. Most
                 existing message blinding methods for RSA additionally
                 require the multiplicative inversion, even though
                 computational complexity of this operation is $ O(n^3)
                 $ which is equal to that of the exponentiation. Thus,
                 this additional operation is known to be the main
                 drawback of the existing message blinding methods for
                 RSA. In addition to requiring no additional
                 multiplicative inversion, our new countermeasure
                 provides the security against various power analysis
                 attacks as well as general differential power
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mahdavikhah:2014:MFP,
  author =       "Behzad Mahdavikhah and Ramin Mafi and Shahin
                 Sirouspour and Nicola Nicolici",
  title =        "A multiple-{FPGA} parallel computing architecture for
                 real-time simulation of soft-object deformation",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "81:1--81:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560031",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hardware-based parallel computing is proposed for
                 acceleration of finite-element (FE) analysis of linear
                 elastic deformation models. An implementation of the
                 Preconditioned Conjugate Gradient algorithm on N Field
                 Programmable Gate Array (FPGA) devices solves the large
                 linear system of equations arising from the FE
                 discretization. The system employs a large number of
                 customized fixed-point computing units with a
                 high-throughput memory architecture. An implementation
                 of this scalable architecture on four Altera EP3SE110
                 FPGA devices yields a peak performance of 604 Giga
                 Operations per second. This enables haptic simulation
                 of a 3-dimensional deformable object of 21000 elements
                 at an update rate of 400Hz.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Axer:2014:BTP,
  author =       "Philip Axer and Rolf Ernst and Heiko Falk and Alain
                 Girault and Daniel Grund and Nan Guan and Bengt Jonsson
                 and Peter Marwedel and Jan Reineke and Christine
                 Rochange and Maurice Sebastian and Reinhard {Von
                 Hanxleden} and Reinhard Wilhelm and Wang Yi",
  title =        "Building timing predictable embedded systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "82:1--82:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560033",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A large class of embedded systems is distinguished
                 from general-purpose computing systems by the need to
                 satisfy strict requirements on timing, often under
                 constraints on available resources. Predictable system
                 design is concerned with the challenge of building
                 systems for which timing requirements can be guaranteed
                 a priori. Perhaps paradoxically, this problem has
                 become more difficult by the introduction of
                 performance-enhancing architectural elements, such as
                 caches, pipelines, and multithreading, which introduce
                 a large degree of uncertainty and make guarantees
                 harder to provide. The intention of this article is to
                 summarize the current state of the art in research
                 concerning how to build predictable yet performant
                 systems. We suggest precise definitions for the concept
                 of ``predictability'', and present predictability
                 concerns at different abstraction levels in embedded
                 system design. First, we consider timing predictability
                 of processor instruction sets. Thereafter, we consider
                 how programming languages can be equipped with
                 predictable timing semantics, covering both a
                 language-based approach using the synchronous
                 programming paradigm, as well as an environment that
                 provides timing semantics for a mainstream programming
                 language (in this case C). We present techniques for
                 achieving timing predictability on multicores. Finally,
                 we discuss how to handle predictability at the level of
                 networked embedded systems where randomly occurring
                 errors must be considered.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bathen:2014:ERC,
  author =       "Luis Angel D. Bathen and Nikil D. Dutt",
  title =        "Embedded {RAIDs}-on-chip for bus-based
                 chip-multiprocessors",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "83:1--83:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2533316",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The dual effects of larger die sizes and technology
                 scaling, combined with aggressive voltage scaling for
                 power reduction, increase the error rates for on-chip
                 memories. Traditional on-chip memory reliability
                 techniques (e.g., ECC) incur significant power and
                 performance overheads. In this article, we propose a
                 low-power-and-performance-overhead Embedded RAID
                 (E-RAID) strategy and present Embedded RAIDs-on-Chip
                 (E-RoC), a distributed dynamically managed reliable
                 memory subsystem for bus-based Chip-Multiprocessors.
                 E-RoC achieves reliability through redundancy by
                 optimizing RAID-like policies tuned for on-chip
                 distributed memories. We achieve on-chip reliability of
                 memories through the use of Distributed Dynamic
                 ScratchPad Allocatable Memories (DSPAMs) and their
                 allocation policies. We exploit aggressive voltage
                 scaling to reduce power consumption overheads due to
                 parallel DSPAM accesses, and rely on the E-RoC Manager
                 to automatically handle any resulting
                 voltage-scaling-induced errors. We demonstrate how
                 E-RAIDs can further enhance the fault tolerance of
                 traditional memory reliability approaches by designing
                 E-RAID levels that exploit ECC. Finally, we show the
                 power and flexibility of the E-RoC concept by showing
                 the benefits of having a heterogeneous E-RAID levels
                 that fit each application's needs (fault tolerance,
                 power/energy, performance). Our experimental results on
                 CHStone/Mediabench II benchmarks show that our E-RAID
                 levels converge to 100\% error-free data rates much
                 faster than traditional ECC approaches. Moreover,
                 E-RAID levels that exploit ECC can guarantee 99.9\%
                 error-free data rates at ultra low Vdd on average,
                 where as traditional ECC approaches were able to attain
                 at most 99.1\% error-free data rates. We observe an
                 average of 22\% dynamic power consumption increase by
                 using traditional ECC approaches with respect to the
                 baseline (non-voltage scaled SPMs), whereas our E-RAID
                 levels are able to save dynamic power consumption by an
                 average of 27\% (w.r.t. the same non-voltage scaled
                 SPMs baseline), while incurring worst-case 2\% higher
                 performance overheads than traditional ECC approaches.
                 By voltage scaling the memories, we see that
                 traditional ECC approaches are able to save static
                 energy by 6.4\% (average), where as our E-RAID
                 approaches achieve 23.4\% static energy savings
                 (average). Finally, we observe that mixing E-RAID
                 levels allows us to further reduce the dynamic power
                 consumption by up to 55.5\% at the cost of an average
                 5.6\% increase in execution time over traditional
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Logaras:2014:PAE,
  author =       "Evangelos Logaras and Orsalia G. Hazapis and Elias S.
                 Manolakos",
  title =        "{Python} to accelerate embedded {SoC} design: a case
                 study for systems biology",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "84:1--84:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560032",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present SysPy (System Python) a tool which exploits
                 the strengths of the popular Python scripting language
                 to boost design productivity of embedded System on
                 Chips for FPGAs. SysPy acts as a ``glue'' software
                 between mature HDLs, ready-to-use VHDL components and
                 programmable processor soft IP cores. SysPy can be used
                 to: (i) automatically translate hardware components
                 described in Python into synthesizable VHDL, (ii)
                 capture top-level structural descriptions of
                 processor-centric SoCs in Python, (iii) implement all
                 the steps necessary to compile the user's C code for an
                 instruction set processor core and generate processor
                 specific Tcl scripts that import to the design project
                 all the necessary HDL files of the processor's
                 description and instantiate/connect the core to other
                 blocks in a synthesizable top-level Python description.
                 Moreover, we have developed a Hardware Abstraction
                 Layer (HAL) in Python which allows user applications
                 running in a host PC to utilize effortlessly the SoC's
                 resources in the FPGA. SysPy's design capabilities,
                 when complemented with the developed HAL software API,
                 provide all the necessary tools for hw/sw partitioning
                 and iterative design for efficient SoC's performance
                 tuning. We demonstrate how SysPy's design flow and
                 functionalities can be used by building a
                 processor-centric embedded SoC for computational
                 systems biology. The designed SoC, implemented using a
                 Xilinx Virtex-5 FPGA, combines the flexibility of a
                 programmable soft processor core (Leon3) with the high
                 performance of an application specific core to simulate
                 flexibly and efficiently the stochastic behavior of
                 large size biomolecular reaction networks. Such
                 networks are essential for studying the dynamics of
                 complex biological systems consisting of multiple
                 interacting pathways.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rodrigues:2014:LPI,
  author =       "Rance Rodrigues and Arunachalam Annamalai and Sandip
                 Kundu",
  title =        "A low-power instruction replay mechanism for design of
                 resilient microprocessors",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "85:1--85:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560034",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There is a growing concern about the increasing rate
                 of defects in computing substrates. Traditional
                 redundancy solutions prove to be too expensive for
                 commodity microprocessor systems. Modern
                 microprocessors feature multiple execution units to
                 take advantage of instruction level parallelism.
                 However, most workloads do not exhibit the level of
                 instruction level parallelism that a typical
                 microprocessor is resourced for. This offers an
                 opportunity to reexecute instructions using idle
                 execution units. But, relying solely on idle resources
                 will not provide full instruction coverage and there is
                 a need to explore other alternatives. To that end, we
                 propose and evaluate two instruction replay schemes
                 within the same core for online testing of the
                 execution units. One scheme (RER) reexecutes only the
                 retired instructions, while the other (REI) reexecutes
                 all the issued instructions. The complete proposed
                 solution requires a comparator and minor modifications
                 to control logic, resulting in negligible hardware
                 overhead. Both soft and hard error detection are
                 considered and the performance and energy impact of
                 both schemes are evaluated and compared against
                 previously proposed redundant execution schemes.
                 Results show that even though the proposed schemes
                 result in a small performance penalty when compared to
                 previous work, the energy overhead is significantly
                 reduced.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tavana:2014:SHT,
  author =       "Mohammad Khavari Tavana and Nasibeh Teimouri and
                 Meisam Abdollahi and Maziar Goudarzi",
  title =        "Simultaneous hardware and time redundancy with online
                 task scheduling for low energy highly reliable
                 standby-sparing system",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "86:1--86:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523781/2560035",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Standby-sparing is one of the common techniques in
                 order to design fault-tolerant safety-critical systems
                 where the high level of reliability is needed.
                 Recently, the minimization of energy consumption in
                 embedded systems has attracted a lot of concerns.
                 Simultaneous considering of high reliability and low
                 energy consumption by DVS is a challenging problem in
                 designing such a system, since using DVS has been shown
                 to reduce the reliability profoundly. In this article,
                 we have studied different schemes of standby-sparing
                 systems from the energy consumption and reliability
                 point of view. Moreover, we propose a new
                 standby-sparing scheme which addresses both reliability
                 and energy consumption jointly together. This scheme
                 uses a simple energy management coupled with an online
                 task scheduler which tries to dispatch those ready
                 tasks which are expected to lead to high reliability
                 and low energy consumption in the system. The
                 effectiveness of the proposed scheme has been shown on
                 TGFF under stochastic workloads. The results show 52\%
                 improvement on energy saving compared to the
                 conventional hot standby-sparing system. Moreover, two
                 orders of magnitude higher reliability is obtained on
                 average, while preserving the same level of energy
                 saving as compared to the state-of-the-art low-energy
                 standby-sparing system (LESS).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Riemens:2014:TSA,
  author =       "Danny P. Riemens and Georgi N. Gaydadjiev and Chris I.
                 de Zeeuw and Christos Strydis",
  title =        "Towards scalable arithmetic units with graceful
                 degradation",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "87:1--87:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499367",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a new family of scalable
                 arithmetic units (ScAUs) targeting
                 resource-constrained, embedded devices. We, first,
                 study the performance, power, area and scalability
                 properties of general adders. Next, suitable
                 error-detection schemes for low-power embedded systems
                 are discussed. As a result, our ScAUs are enhanced with
                 a suitable error-detection scheme, resulting in a
                 Parity-Checked ScAU (PCScAU) design. The PCScAU strikes
                 a flexible trade-off between space and time redundancy,
                 offering dependability similar to high-end techniques
                 for the area and power cost of low-end approaches. An
                 alternative design, the Precision-Scalable Arithmetic
                 Unit (PScAU) maintains throughput with degraded
                 precision in case of hardware failures. The PScAU is
                 targeting dependable applications where latency rather
                 than numerical accuracy is more important. The PScAU's
                 downscaled mode is also interesting for runtime thermal
                 management due to its advantageous power consumption.
                 We implemented and synthesized the PCScAU, PScAU and a
                 few important reference designs (double-, triple- and
                 quadruple-modular-redundancy adders with/without input
                 gating) in 90- nm UMC technology. Overall, the PC-ScAU
                 ranks first in 9 out of 10 power-delay-area
                 (PDA)-product variants. It exhibits 16\% area savings
                 and 12\% performance speedup for 7\% increase in total
                 power consumption, compared to the cheapest form of
                 conventional hardware replication with the same fault
                 coverage. The PDA product of the PCScAU is, thus,
                 reduced by 21\%. It is interesting that, while total
                 power slightly increases, the PCScAU static power in
                 fact decreases by 14\%. Therefore, for newer technology
                 nodes where the static power component is significant,
                 the PCScAU can also achieve-next to performance and
                 area --- significant power improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2014:AWL,
  author =       "Sung Kyu Park and Min Kyu Maeng and Ki-Woong Park and
                 Kyu Ho Park",
  title =        "Adaptive wear-leveling algorithm for {PRAM} main
                 memory with a {DRAM} buffer",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "88:1--88:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2558427",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Phase Change RAM (PRAM) is a candidate to replace DRAM
                 main memory due to its low idle power consumption and
                 high scalability. However, its latency and endurance
                 have generated problems in fulfilling its main memory
                 role. The latency can be treated with a DRAM buffer,
                 but the endurance problem remains, with three critical
                 points that need to be improved despite the use of,
                 existing wear-leveling algorithms. First, existing DRAM
                 buffering schemes do not consider write count
                 distribution. Second, swapping and shifting operations
                 are performed statically. Finally, swapping and
                 shifting operations are loosely coupled with a DRAM
                 buffer. As a remedy to these drawbacks, we propose an
                 adaptive wear-leveling algorithm that consists of three
                 novel schemes for PRAM main memory with a DRAM buffer.
                 The PRAM-aware DRAM buffering scheme reduces the write
                 count and prevents skewed writing by considering the
                 write count and clean data based on the least recently
                 used (LRU) scheme. The adaptive multiple swapping and
                 shifting scheme makes the write count even with the
                 dynamic operation timing, the number of swapping pages
                 being based on the workload pattern. Our DRAM
                 buffer-aware swapping and shifting scheme reduces
                 overhead by curbing additional swapping and shifting
                 operations, thus reducing unnecessary write operations.
                 To evaluate the wear-leveling effect, we have
                 implemented a PIN-based wear-leveling simulator. The
                 evaluation confirms that the PRAM lifetime increases
                 from 0.68 years with the previous wear-leveling
                 algorithm to 5.32 years with the adaptive wear-leveling
                 algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anjum:2014:TTA,
  author =       "Omer Anjum and Mubashir Ali and Teemu Pitk{\"a}nen and
                 Jari Nurmi",
  title =        "Transport triggered architecture to perform carrier
                 synchronization for {LTE}",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "89:1--89:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560036",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article implementation of carrier frequency
                 offset estimate for 20MHz LTE baseband processing is
                 discussed. LTE (Long Term Evolution) is a wireless
                 communication standard that makes use of some
                 innovative techniques to gain very high data rates
                 ({$>$100Mbps}). This goal for such a high throughput
                 also imposes design challenges for the industry and
                 academia such as in the case of handheld mobile devices
                 where the power budget is very limited. Implicitly high
                 throughput means we need more computation power and
                 more energy. On the other hand industry is also
                 struggling for a flexible hardware solution, or
                 software defined a radio (SDR), to amortize the huge
                 cost of required hardware changes as the wireless
                 standards have kept evolving. Design innovations are
                 now needed to confront those challenges of low power
                 and flexible design without changing the hardware. The
                 implementation is made on Transport Triggered
                 Architecture (TTA), which is a unique concept in
                 computer architecture design, based on the single
                 instruction, ``MOVE''. The power consumption of the
                 architecture when synthesized on 180nm technology at
                 180MHz and 1.8V is 18.39mW. The total area occupied
                 excluding memory is 0.6mm$^2$. The proposed TTA
                 solution has been compared with, a more ASIC
                 (application specific integrated circuits), like ASIP
                 (application specific instruction processor) solution
                 and a coprocessor accelerator-based solution. The
                 proposed solution is more flexible: easily programmable
                 due to high level language support, easily scalable,
                 and still efficient in energy consumption needed to
                 complete the CFO (carrier frequency offset) estimation
                 task. Because of these attractive characteristics, TTA
                 is also a potential candidate for SDR platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Clemente:2014:AMR,
  author =       "Juan Antonio Clemente and Javier Resano and Daniel
                 Mozos",
  title =        "An approach to manage reconfigurations and reduce area
                 cost in hard real-time reconfigurable systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "90:1--90:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560037",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a methodology for building
                 real-time reconfigurable systems that ensures that all
                 the temporal constraints of a set of applications are
                 met while optimizing the utilization of the available
                 reconfigurable resources. Starting from a static
                 platform that meets all the real-time deadlines, our
                 approach takes advantage of runtime reconfiguration in
                 order to reduce the area needed while guaranteeing that
                 all the deadlines are still met. This goal is achieved
                 by identifying which tasks must be always ready for
                 execution in order to meet the deadlines and by means
                 of a methodology that also allows reducing the area
                 requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dewan:2014:BAF,
  author =       "Farhana Dewan and Nathan Fisher",
  title =        "Bandwidth allocation for fixed-priority-scheduled
                 compositional real-time systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "91:1--91:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent research in compositional real-time systems has
                 focused on determination of a component's real-time
                 interface parameters. An important objective in
                 interface-parameter determination is minimizing the
                 bandwidth allocated to each component of the system
                 while simultaneously guaranteeing component
                 schedulability. With this goal in mind, in this
                 article, we explore fixed-priority schedulability in
                 compositional setting. First we derive an efficient
                 exact test based on iterative convergence for sporadic
                 task systems scheduled by fixed-priority (e.g.,
                 deadline monotonic, rate monotonic) upon an
                 explicit-deadline periodic (EDP) resource. Then we
                 address the time complexity of the exact test by
                 developing a fully-polynomial-time approximation scheme
                 (FPTAS) for allocating bandwidth to components. Our
                 parametric algorithm takes the task system and an
                 accuracy parameter $ \epsilon > 0 $ as input and
                 returns a bandwidth which is guaranteed to be at most a
                 factor $ (1 + \epsilon) $ times the optimal minimum
                 bandwidth required to successfully schedule the task
                 system. We perform thorough simulation over
                 synthetically generated task systems to compare the
                 performance of our proposed efficient-exact and the
                 approximate algorithm and observe a significant
                 decrease in runtime and a very small relative error
                 when comparing the approximate algorithm with the exact
                 algorithm and the sufficient algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2014:EIE,
  author =       "I-Wei Wu and Jean Jyh-Jiun Shann and Wei-Chung Hsu and
                 Chung-Ping Chung",
  title =        "Extended Instruction Exploration for Multiple-Issue
                 Architectures",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "92:1--92:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560039",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to satisfy the growing demand for
                 high-performance computing in modern embedded devices,
                 several architectural and microarchitectural
                 enhancements have been implemented in processor
                 architectures. Extended instruction (EI) is often used
                 for architectural enhancement, while issuing multiple
                 instructions is a common approach for
                 microarchitectural enhancement. The impact of combining
                 both of these approaches in the same design is not well
                 understood. While previous studies have shown that EI
                 can potentially improve performance in some
                 applications on certain multiple-issue architectures,
                 the algorithms used to identify EI for multiple-issue
                 architectures yield only limited performance
                 improvement. This is because not all arithmetic
                 operations are suited for EI for multiple-issue
                 architectures. To explore the full potential of EI for
                 multiple-issue architectures, two important factors
                 need to be considered: (1) the execution performance of
                 an application is dominated by critical (located on the
                 critical path) and highly resource-contentious (i.e.,
                 having a high probability of being delayed during
                 execution due to hardware resource limitations)
                 operations, and (2) an operation may become critical
                 and/or highly resource contentious after some
                 operations are added to the EI. This article presents
                 an EI exploration algorithm for multiple-issue
                 architectures that focuses on these two factors.
                 Simulation results show that the proposed algorithm
                 outperforms previously published algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Moussalli:2014:SPX,
  author =       "Roger Moussalli and Mariam Salloum and Robert Halstead
                 and Walid Najjar and Vassilis J. Tsotras",
  title =        "A study on parallelizing {XML} path filtering using
                 accelerators",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "93:1--93:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560040",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Publish-subscribe systems present the state of the art
                 in information dissemination to multiple users. Such
                 systems have evolved from simple topic-based to the
                 current XML-based systems. XML-based pub-sub systems
                 provide users with more flexibility by allowing the
                 formulation of complex queries on the content as well
                 as the structure of the streaming messages. Messages
                 that match a given user query are forwarded to the
                 user. This article examines how to exploit the
                 parallelism found in XPath filtering. Using an incoming
                 XML stream, parsing and matching thousands of user
                 profiles are performed simultaneously by matching
                 engines. We show the benefits and trade-offs of mapping
                 the proposed filtering approach onto FPGAs, processing
                 streams of XML at wire speed, and GPUs, providing the
                 flexibility of software. This is in contrast to
                 conventional approaches bound by the sequential aspect
                 of software computing, associated with a large memory
                 footprint. By converting XPath expressions into custom
                 stacks, our solution is the first to provide support
                 for complex XPath structural constructs, such as
                 parent-child and ancestor descendant relations, whilst
                 allowing wildcarding and recursion. The measured
                 speedups resulting from the GPU and FPGA accelerations
                 versus single-core CPUs are up to 6.6X and 2.5 orders
                 of magnitude, respectively. The FPGA approaches are up
                 to 31X faster than software running on 12 CPU cores.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2014:PRR,
  author =       "Hengchang Liu and Pan Hui and Zhiheng Xie and Jingyuan
                 Li and David Siu and Gang Zhou and Liusheng Huang and
                 John A. Stankovic",
  title =        "Providing reliable and real-time delivery in the
                 presence of body shadowing in breadcrumb systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "94:1--94:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2557633",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The primary goal of breadcrumb trail sensor networks
                 is to transmit in real-time users' physiological
                 parameters that measure life-critical functions to an
                 incident commander through reliable multihop
                 communication. In applications using breadcrumb
                 solutions, there are often many users working together,
                 and this creates a well-known body shadowing effect
                 (BSE). In this article, we first measure the
                 characteristics of body shadowing for 2.4GHz sensor
                 nodes. Our empirical results show that the body
                 shadowing effect leads to severe packet loss and
                 consequently very poor real-time performance. Then we
                 develop a novel Intentional Forwarding solution. This
                 solution accurately detects the shadowing mode and
                 enables selected neighbors to forward data packets.
                 Experimental results from a fully implemented testbed
                 demonstrate that Intentional Forwarding is able to
                 improve the end-to-end average packet delivery ratio
                 (PDR) from 58\% to 93\% and worst-case PDR from 45\% to
                 85\%, and is able to meet soft real-time requirements
                 even under severe body shadowing problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gal:2014:GLC,
  author =       "Bertrand {Le Gal} and Christophe Jego",
  title =        "{GPU-like} on-chip system for decoding {LDPC} codes",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "95:1--95:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2538668",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Rapid prototyping is an important step in the
                 development and the verification of computationally
                 demanding tasks of digital communication systems, such
                 as Forward Error Correction (FEC) decoding. The goal is
                 to replace time-consuming simulations based on abstract
                 models of the system with real-time experiments under
                 real-world conditions. GPU-like architecture is a
                 promising approach to fully exploit the potential of
                 FPGA-based acceleration platforms. In this article, an
                 application-specific GPU-like architecture and a
                 complete compilation framework for decoding LDPC codes
                 are proposed. The interest in an application-specific
                 GPU in comparison with current GPUs is detailed.
                 Finally, real-time experimentations demonstrate the
                 potential of the GPU-like decoder to investigate both
                 algorithmic and architectural issues.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khan:2014:OLT,
  author =       "Umair Ali Khan and Bernhard Rinner",
  title =        "Online learning of timeout policies for dynamic power
                 management",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "96:1--96:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2529992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic power management (DPM) refers to strategies
                 which selectively change the operational states of a
                 device during runtime to reduce the power consumption
                 based on the past usage pattern, the current workload,
                 and the given performance constraint. The power
                 management problem becomes more challenging when the
                 workload exhibits nonstationary behavior which may
                 degrade the performance of any single or static DPM
                 policy. This article presents a reinforcement learning
                 (RL)-based DPM technique for optimal selection of
                 timeout values in the different device states. Each
                 timeout period determines how long the device will
                 remain in a particular state before the transition
                 decision is taken. The timeout selection is based on
                 workload estimates derived from a Multilayer Artificial
                 Neural Network (ML-ANN) and an objective function given
                 by weighted performance and power parameters. Our DPM
                 approach is further able to adapt the power-performance
                 weights online to meet user-specified power and
                 performance constraints, respectively. We have
                 completely implemented our DPM algorithm on our
                 embedded traffic surveillance platform and performed
                 long-term experiments using real traffic data to
                 demonstrate the effectiveness of the DPM. Our results
                 show that the proposed learning algorithm not only
                 adequately explores the power-performance trade-off
                 with nonstationary workload but can also successfully
                 perform online adjustment of the trade-off parameter in
                 order to meet the user-specified constraint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gong:2014:SBF,
  author =       "Lingkan Gong and Oliver Diessel",
  title =        "Simulation-based functional verification of
                 dynamically reconfigurable systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "97:1--97:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560042",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamically reconfigurable systems (DRS) implemented
                 using field-programmable gate arrays (FPGAs) allow
                 hardware logic to be partially reconfigured while the
                 rest of the design continues to operate. By mapping
                 multiple reconfigurable hardware modules to the same
                 physical region of an FPGA, such systems are able to
                 time-multiplex their modules at runtime and adapt
                 themselves to changing execution requirements. This
                 architectural flexibility introduces challenges for
                 verifying system functionality. New simulation
                 approaches are required to extend traditional
                 simulation techniques to assist designers in testing
                 and debugging the time-varying behavior of DRS. This
                 article summarizes our previous work on ReSim, the
                 first tool to allow cycle-accurate yet physically
                 independent simulation of a DRS reconfiguring both its
                 logic and state. Furthermore, ReSim-based simulation
                 does not require changing the design for simulation
                 purposes and thereby verifies the implementation-ready
                 design instead of a variation of the design. We discuss
                 the conflicting requirements of simulation accuracy and
                 verification productivity in verifying DRS designs and
                 describe our approach to resolve this challenge.
                 Through a range of case studies, we demonstrate that
                 ReSim assists designers in detecting fabric-independent
                 bugs of DRS designs and helps to achieve verification
                 closure of DRS design projects.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guimbretiere:2014:ADP,
  author =       "Fran{\c{c}}ois Guimbreti{\'e}re and Shenwei Liu and
                 Han Wang and Rajit Manohar",
  title =        "An asymmetric dual-processor architecture for
                 low-power information appliances",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "98:1--98:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2560538",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 11 18:33:06 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As users become increasingly conscious of their energy
                 footprint-either to improve battery life or to respect
                 the environment-improved energy efficiency of systems
                 has gained in importance. This is especially important
                 in the context of information appliances such as e-book
                 readers that are meant to replace books, since their
                 energy efficiency impacts how long the appliance can be
                 used on a single charge of the battery. In this
                 article, we present a new software and hardware
                 architecture for information appliances that provides
                 significant advantages in terms of device lifetime. The
                 architecture combines a low-power microcontroller with
                 a high-performance application processor, where the
                 low-power microcontroller is used to handle simple user
                 interactions (e.g., turning pages, inking, entering
                 text) without waking up the main application processor.
                 We demonstrate how this architecture is easily adapted
                 to the traditional way of building user interfaces
                 using a user interface markup language. We report on
                 our initial measurements using an E Ink-based
                 prototype. When comparing our hybrid architecture to a
                 simpler solution we found that we can increase the
                 battery life by a factor of 1.72 for a reading task and
                 by a factor of 3.23 for a writing task. We conclude by
                 presenting design guidelines aimed at optimizing the
                 overall energy signature of information appliances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anonymous:2014:AOS,
  author =       "Anonymous",
  title =        "Abstracts: Online Supplements Volume 13, Number 1s
                 Volume 13, Number 2s Volume 13, Number 3s Volume 13,
                 Number 4s Volume 13, Number 5s",
  journal =      j-TECS,
  volume =       "13",
  number =       "4",
  pages =        "99:1--99:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2688494.2688495",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 5 18:52:55 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Natale:2014:ESI,
  author =       "Marco {Di Natale} and Rich West and Jian-Jia Chen and
                 Rahul Mangharam",
  title =        "Editorial: Special issue on real-time and embedded
                 technology and applications",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "119:1--119:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2588608",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "119",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Whitham:2014:ERC,
  author =       "Jack Whitham and Neil C. Audsley and Robert I. Davis",
  title =        "Explicit reservation of cache memory in a predictable,
                 preemptive multitasking real-time system",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "120:1--120:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2523070",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We describe and evaluate explicit reservation of cache
                 memory to reduce the cache-related preemption delay
                 (CRPD) observed when tasks share a cache in a
                 preemptive multitasking hard real-time system. We
                 demonstrate the approach using measurements obtained
                 from a hardware prototype, and present schedulability
                 analyses for systems that share a cache by explicit
                 reservation. These analyses form the basis for a series
                 of experiments to further evaluate the approach. We
                 find that explicit reservation is most useful for
                 larger task sets with high utilization. Some task sets
                 cannot be scheduled with a conventional cache, but are
                 schedulable with explicit reservation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "120",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nirjon:2014:MSR,
  author =       "Shahriar Nirjon and Angela Nicoara and Cheng-Hsin Hsu
                 and Jatinder Pal Singh and John A. Stankovic",
  title =        "{MultiNets}: a system for real-time switching between
                 multiple network interfaces on mobile devices",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "121:1--121:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2489788",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "MultiNets is a system supporting seamless switch-over
                 between wireless interfaces on mobile devices in
                 real-time. MultiNets is configurable to run in three
                 different modes: (i) Energy Saving mode --for choosing
                 the interface that saves the most energy based on the
                 condition of the device, (ii) Offload mode --for
                 offloading data traffic from the cellular to WiFi
                 network, and (iii) Performance mode --for selecting the
                 network for the fastest data connectivity. MultiNets
                 also provides a powerful API that gives the application
                 developers: (i) the choice to select a network
                 interface to communicate with a specific server, and
                 (ii) the ability to simultaneously transfer data over
                 multiple network interfaces. MultiNets is modular,
                 easily integrable, lightweight, and applicable to
                 various mobile operating systems. We implement
                 MultiNets on Android devices as a show case. MultiNets
                 does not require any extra support from the network
                 infrastructure and runs existing applications
                 transparently. To evaluate MultiNets, we first collect
                 data traces from 13 actual Android smartphone users
                 over three months. We then use the collected traces to
                 show that, by automatically switching to WiFi whenever
                 it is available, MultiNets can offload on average
                 79.82\% of the data traffic. We also illustrate that,
                 by optimally switching between the interfaces,
                 MultiNets can save on average 21.14 KJ of energy per
                 day, which is equivalent to 27.4\% of the daily energy
                 usage. Using our API, we demonstrate that a video
                 streaming application achieves 43--271\% higher
                 streaming rate when concurrently using WiFi and 3G
                 interfaces. We deploy MultiNets in a real-world
                 scenario and our experimental results show that
                 depending on the user requirements, it outperforms the
                 state-of-the-art Android system either by saving up to
                 33.75\% energy, achieving near-optimal offloading, or
                 achieving near-optimal throughput while substantially
                 reducing TCP interruptions due to switching.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "121",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kumar:2014:WCG,
  author =       "Pratyush Kumar and Lothar Thiele",
  title =        "Worst-case guarantees on a processor with
                 temperature-based feedback control of speed",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "122:1--122:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584611",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "On-chip temperatures continue to rise, in spite of
                 design efforts towards more efficient cooling and novel
                 low-power technologies. Run-time thermal management
                 techniques, such as speed scaling and system
                 throttling, constitute a standard component in today's
                 processors. One such technique is the feedback control
                 of the processing speed based on the on-chip
                 temperature. If suitably designed, such a controller
                 can ensure that the temperature of the processor does
                 not exceed a given bound, independent of the
                 application. Such isolation of needs is encouraging.
                 However, from the application's stand-point, such a
                 processor must provide performance guarantees; in
                 particular, the guarantee that real-time jobs do not
                 have worst-case delays larger than their relative
                 deadlines. For applications which exhibit variability,
                 such as bursty arrival patterns, computing such
                 guarantees is not apparent. As key enablers in such a
                 computation, for the specific setting of
                 First-Come-First-Serve (FCFS) scheduling, we (a) define
                 and prove a monotonicity principle satisfied by the
                 processor with the said controller, and (b) propose a
                 thermally clipped processor model. We identify the
                 worst-case trace simulating which on a suitably chosen
                 thermally clipped processor provides the tight
                 upper-bound on the worst-case delay. These results hold
                 for general models of (a) the power consumption of the
                 processor, (b) its thermal model, (c) the speed scaling
                 law, and (d) the task model. For this modelling scope,
                 we show that the same worst-case trace also leads to
                 the worst-case temperature of the processor. This is
                 useful to characterise tasks which do not load the
                 processor sufficiently to hit the given peak
                 temperature bound. We demonstrate the utility of this
                 calculation by designing a shaper to delay the arrival
                 times of jobs and thereby restrict the observed
                 worst-case temperature while still meeting the task's
                 deadlines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "122",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guan:2014:WAM,
  author =       "Nan Guan and Mingsong Lv and Wang Yi and Ge Yu",
  title =        "{WCET} analysis with {MRU} cache: Challenging {LRU}
                 for predictability",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "123:1--123:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584655",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most previous work on cache analysis for WCET
                 estimation assumes a particular replacement policy
                 called LRU. In contrast, much less work has been done
                 for non-LRU policies, since they are generally
                 considered to be very unpredictable. However, most
                 commercial processors are actually equipped with these
                 non-LRU policies, since they are more efficient in
                 terms of hardware cost, power consumption and thermal
                 output, while still maintaining almost as good
                 average-case performance as LRU. In this work, we study
                 the analysis of MRU, a non-LRU replacement policy
                 employed in mainstream processor architectures like
                 Intel Nehalem. Our work shows that the predictability
                 of MRU has been significantly underestimated before,
                 mainly because the existing cache analysis techniques
                 and metrics do not match MRU well. As our main
                 technical contribution, we propose a new cache hit/miss
                 classification, k -Miss, to better capture the MRU
                 behavior, and develop formal conditions and efficient
                 techniques to decide k -Miss memory accesses. A
                 remarkable feature of our analysis is that the k -Miss
                 classifications under MRU are derived by the analysis
                 result of the same program under LRU. Therefore, our
                 approach inherits the advantages in efficiency and
                 precision of the state-of-the-art LRU analysis
                 techniques based on abstract interpretation.
                 Experiments with instruction caches show that our
                 proposed MRU analysis has both good precision and high
                 efficiency, and the obtained estimated WCET is rather
                 close to (typically 1\% to 8\% more than) that obtained
                 by the state-of-the-art LRU analysis, which indicates
                 that MRU is also a good candidate for cache replacement
                 policies in real-time systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "123",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chattopadhyay:2014:UWA,
  author =       "Sudipta Chattopadhyay and Lee Kee Chong and Abhik
                 Roychoudhury and Timon Kelter and Peter Marwedel and
                 Heiko Falk",
  title =        "A Unified {WCET} analysis framework for multicore
                 platforms",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "124:1--124:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584654",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the advent of multicore architectures, worst-case
                 execution time (WCET) analysis has become an
                 increasingly difficult problem. In this article, we
                 propose a unified WCET analysis framework for multicore
                 processors featuring both shared cache and shared bus.
                 Compared to other previous works, our work differs by
                 modeling the interaction of shared cache and shared bus
                 with other basic microarchitectural components (e.g.,
                 pipeline and branch predictor). In addition, our
                 framework does not assume a timing anomaly free
                 multicore architecture for computing the WCET. A
                 detailed experiment methodology suggests that we can
                 obtain reasonably tight WCET estimates in a wide range
                 of benchmark programs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "124",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2014:CCL,
  author =       "Xiuming Zhu and Pei-Chi Huang and Jianyong Meng and
                 Song Han and Aloysius K. Mok and Deji Chen and Mark
                 Nixon",
  title =        "{ColLoc}: a collaborative location and tracking system
                 on {WirelessHART}",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "125:1--125:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584656",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Localization in wireless sensor networks is an
                 important functionality that is required for tracking
                 personnel and assets in industrial environments,
                 especially for emergency response. Current commercial
                 localization systems such as GPS suffer from the
                 limitations of either high cost or low availability in
                 many situations (e.g., indoor environments that exclude
                 direct line-of-sight signal reception). The development
                 of industrial wireless sensor networks such as
                 WirelessHART provides an alternative. In this article,
                 we present the design and implementation of ColLoc: a
                 collaborative location and tracking system on
                 WirelessHART as an industrially viable solution. This
                 solution is built upon several technological advances.
                 First, ColLoc adds the roaming functionality to
                 WirelessHART and thus provides a means for keeping
                 mobile WirelessHART devices connected to the network.
                 Second, ColLoc employs a collaborative framework to
                 integrate different types of distance measurements into
                 the location estimation algorithm by weighing them
                 according to their precision levels. ColLoc adopts
                 several novel techniques to improve distance estimation
                 accuracy and decreases the RSSI presurvey cost. These
                 techniques include introducing distance error range
                 constraints to the measurements, judiciously selecting
                 the initial point in location estimation and online
                 updating the signal propagation models in the anchor
                 nodes, integrating Extended Kalman Filter (EKF) with
                 trilateration to track moving objects. Our
                 implementation of ColLoc can be applied to any
                 WirelessHART-conforming network because no modification
                 is needed on the WirelessHART field devices. We have
                 implemented a complete ColLoc system to validate both
                 the design and the effectiveness of our localization
                 algorithm. Our experiments show that the mobile device
                 never drops out of the WirelessHART network while
                 moving around; with the help of even one dependable
                 anchor, using RSSI can yield at least 75\% of distance
                 errors below 5 meters, which is quite acceptable for
                 many typical industrial automation applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "125",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2014:IEM,
  author =       "Huang-Ming Huang and Christopher Gill and Chenyang
                 Lu",
  title =        "Implementation and evaluation of mixed-criticality
                 scheduling approaches for sporadic tasks",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "126:1--126:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584612",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  note =         "See corrections and comments
                 \cite{Fleming:2017:CDI}.",
  abstract =     "Traditional fixed-priority scheduling analysis for
                 periodic and sporadic task sets is based on the
                 assumption that all tasks are equally critical to the
                 correct operation of the system. Therefore, every task
                 has to be schedulable under the chosen scheduling
                 policy, and estimates of tasks' worst-case execution
                 times must be conservative in case a task runs longer
                 than is usual. To address the significant
                 underutilization of a system's resources under normal
                 operating conditions that can arise from these
                 assumptions, several mixed-criticality scheduling
                 approaches have been proposed. However, to date, there
                 have been few quantitative comparisons of system
                 schedulability or runtime overhead for the different
                 approaches. In this article, we present a side-by-side
                 implementation and evaluation of the known
                 mixed-criticality scheduling approaches, for periodic
                 and sporadic mixed-criticality tasks on uniprocessor
                 systems, under a mixed-criticality scheduling model
                 that is common to all these approaches. To make a fair
                 evaluation of mixed-criticality scheduling, we also
                 address previously open issues and propose
                 modifications to improve particular approaches. Our
                 empirical evaluations demonstrate that user-space
                 implementations of mechanisms to enforce different
                 mixed-criticality scheduling approaches can be achieved
                 atop Linux without kernel modification, with reasonably
                 low (but in some cases nontrivial) overhead for
                 mixed-criticality real-time task sets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "126",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pajic:2014:SCM,
  author =       "Miroslav Pajic and Zhihao Jiang and Insup Lee and Oleg
                 Sokolsky and Rahul Mangharam",
  title =        "Safety-critical medical device development using the
                 {UPP2SF} model translation tool",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "127:1--127:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584651",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software-based control of life-critical embedded
                 systems has become increasingly complex, and to a large
                 extent has come to determine the safety of the human
                 being. For example, implantable cardiac pacemakers have
                 over 80,000 lines of code which are responsible for
                 maintaining the heart within safe operating limits. As
                 firmware-related recalls accounted for over 41\% of the
                 600,000 devices recalled in the last decade, there is a
                 need for rigorous model-driven design tools to generate
                 verified code from verified software models. To this
                 effect, we have developed the UPP2SF model-translation
                 tool, which facilitates automatic conversion of
                 verified models (in UPPAAL) to models that may be
                 simulated and tested (in Simulink/Stateflow). We
                 describe the translation rules that ensure correct
                 model conversion, applicable to a large class of
                 models. We demonstrate how UPP2SF is used in the
                 model-driven design of a pacemaker whose model is (a)
                 designed and verified in UPPAAL (using timed automata),
                 (b) automatically translated to Stateflow for
                 simulation-based testing, and then (c) automatically
                 generated into modular code for hardware-level
                 integration testing of timing-related errors. In
                 addition, we show how UPP2SF may be used for worst-case
                 execution time estimation early in the design stage.
                 Using UPP2SF, we demonstrate the value of integrated
                 end-to-end modeling, verification, code-generation and
                 testing process for complex software-controlled
                 embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "127",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saifullah:2014:NOR,
  author =       "Abusayeed Saifullah and Chengjie Wu and Paras Babu
                 Tiwari and You Xu and Yong Fu and Chenyang Lu and Yixin
                 Chen",
  title =        "Near optimal rate selection for wireless control
                 systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "4s",
  pages =        "128:1--128:??",
  month =        apr,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2584652",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 4 18:59:24 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the advent of industrial standards such as
                 WirelessHART, process industries are now gravitating
                 towards wireless control systems. Due to limited
                 bandwidth in a wireless network shared by multiple
                 control loops, it is critical to optimize the overall
                 control performance. In this article, we address the
                 scheduling-control co-design problem of determining the
                 optimal sampling rates of feedback control loops
                 sharing a WirelessHART network. The objective is to
                 minimize the overall control cost while ensuring that
                 all data flows meet their end-to-end deadlines. The
                 resulting constrained optimization based on existing
                 delay bounds for WirelessHART networks is challenging
                 since it is nondifferentiable, nonlinear, and not in
                 closed-form. We propose four methods to solve this
                 problem. First, we present a subgradient method for
                 rate selection. Second, we propose a greedy heuristic
                 that usually achieves low control cost while
                 significantly reducing the execution time. Third, we
                 propose a global constrained optimization algorithm
                 using a simulated annealing (SA) based penalty method.
                 We study SA method under both constant factor penalty
                 and adaptive penalty. Finally, we formulate rate
                 selection as a differentiable convex optimization
                 problem that provides a quick solution through a convex
                 optimization technique. This is based on a new delay
                 bound that is convex and differentiable, and hence
                 simplifies the optimization problem. We study both the
                 gradient descent method and the interior point method
                 to solve it. We evaluate all methods through
                 simulations based on topologies of a 74-node wireless
                 sensor network testbed. The subgradient method is
                 disposed to incur the longest execution time as well as
                 the highest control cost among all methods. Among the
                 SA-based constant penalty method, the greedy heuristic,
                 and the gradient descent method, the first two
                 represent the opposite ends of the tradeoff between
                 control cost and execution time, while the third one
                 hits the balance between the two. We further observe
                 that the SA based adaptive penalty method is superior
                 to the constant penalty method, and that the interior
                 point method is superior to the gradient method. Thus,
                 the interior point method and the SA-based adaptive
                 penalty method are the two most effective approaches
                 for rate selection. While both methods are competitive
                 against each other in terms of control cost, the
                 interior point method is significantly faster than the
                 penalty method. As a result, the interior point method
                 upon convex relaxation is more suitable for online rate
                 adaptation than the SA based adaptive penalty method
                 due to their significant difference in run-time
                 efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "128",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hettiarachchi:2014:DAF,
  author =       "Pradeep M. Hettiarachchi and Nathan Fisher and Masud
                 Ahmed and Le Yi Wang and Shinan Wang and Weisong Shi",
  title =        "A Design and Analysis Framework for Thermal-Resilient
                 Hard Real-Time Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "146:1--146:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632154",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We address the challenge of designing predictable
                 real-time systems in an unpredictable thermal
                 environment where environmental temperature may
                 dynamically change (e.g., implantable medical devices).
                 Towards this challenge, we propose a control-theoretic
                 design methodology that permits a system designer to
                 specify a set of hard real-time performance modes under
                 which the system may operate. The system automatically
                 adjusts the real-time performance mode based on the
                 external thermal stress. We show (via analysis,
                 simulations, and a hardware testbed implementation)
                 that our control design framework is stable and control
                 performance is equivalent to previous real-time thermal
                 approaches, even under dynamic temperature changes. A
                 crucial and novel advantage of our framework over
                 previous real-time control is the ability to guarantee
                 hard deadlines even under transitions between modes.
                 Furthermore, our system design permits the calculation
                 of a new metric called thermal resiliency that
                 characterizes the maximum external thermal stress that
                 any hard real-time performance mode can withstand.
                 Thus, our design framework and analysis may be
                 classified as a thermal stress analysis for real-time
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "146",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chattopadhyay:2014:CRP,
  author =       "Sudipta Chattopadhyay and Abhik Roychoudhury",
  title =        "Cache-Related Preemption Delay Analysis for Multilevel
                 Noninclusive Caches",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "147:1--147:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632156",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the rapid growth of complex hardware features,
                 timing analysis has become an increasingly difficult
                 problem. The key to solving this problem lies in the
                 precise and scalable modeling of performance-enhancing
                 processor features (e.g., cache). Moreover, real-time
                 systems are often multitasking and use preemptive
                 scheduling, with fixed or dynamic priority assignment.
                 For such systems, cache related preemption delay (CRPD)
                 may increase the execution time of a task. Therefore,
                 CRPD may affect the overall schedulability analysis.
                 Existing works propose to bound the value of CRPD in a
                 single-level cache. In this article, we propose a CRPD
                 analysis framework that can be used for a two-level,
                 noninclusive cache hierarchy. In addition, our proposed
                 framework is also applicable in the presence of shared
                 caches. We first show that CRPD analysis faces several
                 new challenges in the presence of a multilevel,
                 noninclusive cache hierarchy. Our proposed framework
                 overcomes all such challenges and we can formally prove
                 the correctness of our framework. We have performed
                 experiments with several subject programs, including an
                 unmanned aerial vehicle (UAV) controller and an in-situ
                 space debris monitoring instrument. Our experimental
                 results suggest that we can provide sound and precise
                 CRPD estimates using our framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "147",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paul:2014:RTP,
  author =       "Anand Paul",
  title =        "Real-Time Power Management for Embedded {M2M} Using
                 Intelligent Learning Methods",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "148:1--148:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632158",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, an embedded system working model is
                 designed with one server that receives requests by a
                 requester by a service queue that is monitored by a
                 Power Manager (PM). A novel approach is presented based
                 on reinforcement learning to predict the best policy
                 amidst existing DPM policies and deterministic
                 Markovian nonstationary policies (DMNSP). We apply
                 reinforcement learning, namely a computational approach
                 to understanding and automating goal-directed learning
                 that supports different devices according to their DPM.
                 Reinforcement learning uses a formal framework defining
                 the interaction between agent and environment in terms
                 of states, response action, and reward points. The
                 capability of this approach is demonstrated by an
                 event-driven simulator designed using Java with a
                 power-manageable machine-to-machine device. Our
                 experiment result shows that the proposed dynamic power
                 management with timeout policy gives average power
                 saving from 4\% to 21\% and the novel dynamic power
                 management with DMNSP gives average power saving from
                 10\% to 28\% more than already proposed DPM policies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "148",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zeng:2014:MSC,
  author =       "Haibo Zeng and Marco {Di Natale} and Qi Zhu",
  title =        "Minimizing Stack and Communication Memory Usage in
                 Real-Time Embedded Applications",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "149:1--149:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632160",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the development of real-time embedded applications,
                 especially those on systems-on-chip, an efficient use
                 of RAM memory is as important as the effective
                 scheduling of the computation resources. The protection
                 of communication and state variables accessed by
                 concurrent tasks must provide real-time schedulability
                 guarantees while using the least amount of memory.
                 Several schemes, including preemption thresholds, have
                 been developed to improve schedulability and save stack
                 space by selectively disabling preemption. However, the
                 design synthesis problem is still open. In this
                 article, we target the assignment of the scheduling
                 parameters to minimize memory usage for systems of
                 practical interest, including designs compliant with
                 automotive standards. We propose algorithms either
                 proven optimal or shown to improve on randomized
                 optimization methods like simulated annealing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "149",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chakraborty:2014:MCH,
  author =       "Arup Chakraborty and Houman Homayoun and Amin Khajeh
                 and Nikil Dutt and Ahmed Eltawil and Fadi Kurdahi",
  title =        "Multicopy Cache: a Highly Energy-Efficient Cache
                 Architecture",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "150:1--150:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2632162",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches are known to consume a large part of total
                 microprocessor energy. Traditionally, voltage scaling
                 has been used to reduce both dynamic and leakage power
                 in caches. However, aggressive voltage reduction causes
                 process-variation-induced failures in cache SRAM
                 arrays, thus compromising cache reliability. We present
                 MultiCopy Cache (MC$^2$), a new cache architecture that
                 achieves significant reduction in energy consumption
                 through aggressive voltage scaling while maintaining
                 high error resilience (reliability) by exploiting
                 multiple copies of each data item in the cache. Unlike
                 many previous approaches, MC$^2$ does not require any
                 error map characterization and therefore is responsive
                 to changing operating conditions (e.g., Vdd noise,
                 temperature, and leakage) of the cache. MC$^2$ also
                 incurs significantly lower overheads compared to other
                 ECC-based caches. Our experimental results on embedded
                 benchmarks demonstrate that MC$^2$ achieves up to 60\%
                 reduction in energy and energy-delay product (EDP) with
                 only 3.5\% reduction in IPC and no appreciable area
                 overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "150",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hanumaiah:2014:SST,
  author =       "Vinay Hanumaiah and Digant Desai and Benjamin Gaudette
                 and Carole-Jean Wu and Sarma Vrudhula",
  title =        "{STEAM}: a Smart Temperature and Energy Aware
                 Multicore Controller",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "151:1--151:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661430",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent empirical studies have shown that multicore
                 scaling is fast becoming power limited, and
                 consequently, an increasing fraction of a multicore
                 processor has to be under clocked or powered off.
                 Therefore, in addition to fundamental innovations in
                 architecture, compilers and parallelization of
                 application programs, there is a need to develop
                 practical and effective dynamic energy management (DEM)
                 techniques for multicore processors. Existing DEM
                 techniques mainly target reducing processor power
                 consumption and temperature, and only few of them have
                 addressed improving energy efficiency for multicore
                 systems. With energy efficiency taking a center stage
                 in all aspects of computing, the focus of the DEM needs
                 to be on finding practical methods to maximize
                 processor efficiency. Towards this, this article
                 presents STEAM --- an optimal closed-loop DEM
                 controller designed for multicore processors. The
                 objective is to maximize energy efficiency by dynamic
                 voltage and frequency scaling (DVFS). Energy efficiency
                 is defined as the ratio of performance to power
                 consumption or performance-per-watt (PPW). This is the
                 same as the number of instructions executed per Joule.
                 The PPW metric is actually replaced by $ P^\alpha $ PW
                 (performance$^\alpha $-per-Watt), which allows for
                 controlling the importance of performance versus power
                 consumption by varying $ \alpha $. The proposed
                 controller was implemented on a Linux system and tested
                 with the Intel Sandy Bridge processor. There are three
                 power management schemes called governors, available
                 with Intel platforms. They are referred to as (1)
                 Powersave (lowest power consumption), (2) Performance
                 (achieves highest performance), and (3) Ondemand. Our
                 simple and lightweight controller when executing SPEC
                 CPU2006, PARSEC, and MiBench benchmarks have achieved
                 an average of 18\% improvement in energy efficiency
                 (MIPS/Watt) over these ACPI policies. Moreover, STEAM
                 also demonstrated an excellent prediction of core
                 temperatures and power consumption, and the ability to
                 control the core temperatures within $ 3^\circ $C of
                 the specified maximum. Finally, the overhead of the
                 STEAM implementation (in terms of CPU resources) is
                 less than 0.25\%. The entire implementation is
                 self-contained and can be installed on any processor
                 with very little prior knowledge of the processor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "151",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rossebo:2014:ISI,
  author =       "Judith E. Y. Rosseb{\o} and Siv Hilde Houmb and Geri
                 Georg and Virginia N. L. Franqueira and Dimitrios
                 Serpanos",
  title =        "Introduction to Special Issue on Risk and Trust in
                 Embedded Critical Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "152:1--152:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2659008",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "152",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dunbar:2014:DTE,
  author =       "Carson Dunbar and Gang Qu",
  title =        "Designing Trusted Embedded Systems from Finite State
                 Machines",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "153:1--153:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638555",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sequential components are crucial for a real-time
                 embedded system as they control the system based on the
                 system's current state and real life input. In this
                 article, we explore the security and trust issues of
                 sequential system design from the perspective of a
                 finite state machine (FSM), which is the most popular
                 model used to describe sequential systems.
                 Specifically, we find that the traditional FSM
                 synthesis procedure will introduce security risks and
                 cannot guarantee trustworthiness in the implemented
                 circuits. Indeed, we show that not only do there exist
                 simple and effective ways to attack a sequential
                 system, it is also possible to insert a hardware Trojan
                 Horse into the design without introducing any
                 significant design overhead. We then formally define
                 the notion of trust in FSM and propose a novel approach
                 to designing trusted circuits from the FSM
                 specification. We demonstrate both our findings on the
                 security threats and the effectiveness of our proposed
                 method on Microelectronics Center of North Carolina
                 (MCNC) sequential circuit benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "153",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dua:2014:CSS,
  author =       "Akshay Dua and Nirupama Bulusu and Wu-Chang Feng and
                 Wen Hu",
  title =        "Combating Software and {Sybil} Attacks to Data
                 Integrity in Crowd-Sourced Embedded Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "154:1--154:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Crowd-sourced mobile embedded systems allow people to
                 contribute sensor data, for critical applications,
                 including transportation, emergency response and
                 eHealth. Data integrity becomes imperative as malicious
                 participants can launch software and Sybil attacks
                 modifying the sensing platform and data. To address
                 these attacks, we develop (1) a Trusted Sensing
                 Peripheral (TSP) enabling collection of high-integrity
                 raw or aggregated data, and participation in
                 applications requiring additional modalities; and (2) a
                 Secure Tasking and Aggregation Protocol (STAP) enabling
                 aggregation of TSP trusted readings by untrusted
                 intermediaries, while efficiently detecting
                 fabricators. Evaluations demonstrate that TSP and STAP
                 are practical and energy-efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "154",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2014:ISI,
  author =       "Li-Pin Chang and Tei-Wei Kuo and Chris Gill and Jin
                 Nakazawa",
  title =        "Introduction to the Special Issue on Real-Time,
                 Embedded and Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "155:1--155:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660488",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "155",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Qiu:2014:BPD,
  author =       "Keni Qiu and Mengying Zhao and Chun Jason Xue and Alex
                 Orailoglu",
  title =        "Branch Prediction-Directed Dynamic Instruction Cache
                 Locking for Embedded Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "156:1--156:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660492",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cache locking is a cache management technique to
                 preclude the replacement of locked cache contents.
                 Cache locking is often adopted to improve cache access
                 predictability in Worst-Case Execution Time (WCET)
                 analysis. Static cache locking methods have been
                 proposed recently to improve Average-Case Execution
                 Time (ACET) performance. This article presents an
                 approach, Branch Prediction-directed Dynamic Cache
                 Locking (BPDCL), to improve system performance through
                 cache conflict miss reduction. In the proposed
                 approach, the control flow graph of a program is first
                 partitioned into disjoint execution regions, then
                 memory blocks worth locking are determined by
                 calculating the locking profit for each region. These
                 two steps are conducted during compilation time. At
                 runtime, directed by branch predictions, locking
                 routines are prefetched into a small high-speed buffer.
                 The predetermined cache locking contents are loaded and
                 locked at specific execution points during program
                 execution. Experimental results show that the proposed
                 BPDCL method exhibits an average improvement of 25.9\%,
                 13.8\%, and 8.0\% on cache miss rate reduction in
                 comparison to cases with no cache locking, the static
                 locking method, and the dynamic locking method,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "156",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kang:2014:HSA,
  author =       "Chih-Kai Kang and Yu-Jhang Cai and Chin-Hsien Wu and
                 Pi-Cheng Hsiu",
  title =        "A Hybrid Storage Access Framework for High-Performance
                 Virtual Machines",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "157:1--157:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660493",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "In recent years, advances in virtualization technology
                 have enabled multiple virtual machines to run on a
                 physical machine, such that each virtual machine can
                 perform independently with its own operating system.
                 The IT industry has adopted virtualization technology
                 because of its ability to improve hardware resource
                 utilization, achieve low-power consumption, support
                 concurrent applications, simplify device management,
                 and reduce maintenance costs. However, because of the
                 hardware limitation of storage devices, the I/O
                 capacity could cause performance bottlenecks. To
                 address the problem, we propose a hybrid storage access
                 framework that exploits solid-state drives (SSDs) to
                 improve the I/O performance in a virtualization
                 environment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "157",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pagani:2014:EEA,
  author =       "Santiago Pagani and Jian-Jia Chen",
  title =        "Energy Efficiency Analysis for the Single Frequency
                 Approximation {(SFA)} Scheme",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "158:1--158:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660490",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy-efficient designs are important issues in
                 computing systems. This article studies the energy
                 efficiency of a simple and linear-time strategy, called
                 the Single Frequency Approximation (SFA) scheme, for
                 periodic real-time tasks on multicore systems with a
                 shared supply voltage in a voltage island. The strategy
                 executes all the cores at a single frequency to just
                 meet the timing constraints. SFA has been adopted in
                 the literature after task partitioning, but the
                 worst-case performance of SFA in terms of energy
                 consumption incurred is an open problem. We provide
                 comprehensive analysis for SFA to derive the cycle
                 utilization distribution for its worst-case behaviour
                 for energy minimization. Our analysis shows that the
                 energy consumption incurred by using SFA for task
                 execution is at most 1.53 (1.74, 2.10, 2.69,
                 respectively), compared to the energy consumption of
                 the optimal voltage/frequency scaling, when the dynamic
                 power consumption is a cubic function of the frequency
                 and the voltage island has up to 4 (8, 16, 32,
                 respectively) cores. The analysis shows that SFA is
                 indeed an effective scheme under practical settings,
                 even though it is not optimal. Furthermore, since all
                 the cores run at a single frequency and no frequency
                 alignment for Dynamic Voltage and Frequency Scaling
                 (DVFS) between cores is needed, any unicore dynamic
                 power management technique for reducing the energy
                 consumption for idling can be easily incorporated
                 individually on each core in the voltage island. This
                 article also provides an analysis of energy consumption
                 for SFA combined with procrastination for Dynamic Power
                 Management (DPM), resulting in an increment of 1 from
                 the previous results for task execution. Furthermore,
                 we also extend our analysis for deriving the
                 approximation factor of SFA for a multicore system with
                 multiple voltage islands.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "158",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Raravi:2014:TAA,
  author =       "Gurulingesh Raravi and Vincent N{\'e}lis",
  title =        "Task Assignment Algorithms for Heterogeneous
                 Multiprocessors",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "159:1--159:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660494",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Consider the problem of assigning implicit-deadline
                 sporadic tasks on a heterogeneous multiprocessor
                 platform comprising a constant number (denoted by $t$)
                 of distinct types of processors-such a platform is
                 referred to as a $t$-type platform. We present two
                 algorithms, LPG$_{IM}$ and LPG$_{NM}$, each providing
                 the following guarantee. For a given $t$-type platform
                 and a task set, if there exists a task assignment such
                 that tasks can be scheduled to meet their deadlines by
                 allowing them to migrate only between processors of the
                 same type (intra-migrative), then: (i) LPG$_{IM}$
                 succeeds in finding such an assignment where the same
                 restriction on task migration applies (intra-migrative)
                 but given a platform in which only one processor of
                 each type is $ 1 + \alpha \times t - 1 / t$ times
                 faster and (ii) LPG$_{NM}$ succeeds in finding a task
                 assignment where tasks are not allowed to migrate
                 between processors (non-migrative) but given a platform
                 in which every processor is $ 1 + \alpha $ times
                 faster. The parameter $ \alpha $ is a property of the
                 task set; it is the maximum of all the task
                 utilizations that are no greater than one. To the best
                 of our knowledge, for $t$-type heterogeneous
                 multiprocessors: (i) for the problem of intra-migrative
                 task assignment, no previous algorithm exists with a
                 proven bound and hence our algorithm, LPG$_{IM}$, is
                 the first of its kind and (ii) for the problem of
                 non-migrative task assignment, our algorithm,
                 LPG$_{NM}$, has superior performance compared to
                 state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "159",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Andersson:2014:PGT,
  author =       "Bj{\"o}rn Andersson and Gurulingesh Raravi",
  title =        "Provably Good Task Assignment for Two-Type
                 Heterogeneous Multiprocessors Using Cutting Planes",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "160:1--160:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660495",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Consider scheduling of real-time tasks on a
                 multiprocessor where migration is forbidden.
                 Specifically, consider the problem of determining a
                 task-to-processor assignment for a given collection of
                 implicit-deadline sporadic tasks upon a multiprocessor
                 platform in which there are two distinct types of
                 processors. For this problem, we propose a new
                 algorithm, LPC (task assignment based on solving a
                 Linear Program with Cutting planes). The algorithm
                 offers the following guarantee: for a given task set
                 and a platform, if there exists a feasible
                 task-to-processor assignment, then LPC succeeds in
                 finding such a feasible task-to-processor assignment as
                 well but on a platform in which each processor is $ 1.5
                 \times $ faster and has three additional processors.
                 For systems with a large number of processors, LPC has
                 a better approximation ratio than state-of-the-art
                 algorithms. To the best of our knowledge, this is the
                 first work that develops a provably good real-time task
                 assignment algorithm using cutting planes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "160",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mancuso:2014:OPA,
  author =       "Giulio M. Mancuso and Enrico Bini and Gabriele
                 Pannocchia",
  title =        "Optimal Priority Assignment to Control Tasks",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "161:1--161:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660496",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded real-time systems, task priorities are
                 often assigned to meet deadlines. However, in control
                 tasks, a late completion of a task has no catastrophic
                 consequence; rather, it has a quantifiable impact in
                 the control performance achieved by the task. In this
                 article, we address the problem of determining the
                 optimal assignment of priorities and periods of
                 sampled-data control tasks that run over a shared
                 computation unit. We show that the minimization of the
                 overall cost can be performed efficiently using a
                 branch and bound algorithm that can be further speeded
                 up by allowing for a small degree of suboptimality.
                 Detailed numerical simulations are presented to show
                 the advantages of various branching alternatives, the
                 overall algorithm effectiveness, and its scalability
                 with the number of tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "161",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DeNiz:2014:UBR,
  author =       "Dionisio {De Niz} and Lutz Wrage and Anthony Rowe and
                 Ragunathan (Raj) Rajkumar",
  title =        "Utility-Based Resource Overbooking for Cyber-Physical
                 Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "162:1--162:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2660497",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traditional hard real-time scheduling algorithms
                 require the use of the worst-case execution times to
                 guarantee that deadlines will be met. Unfortunately,
                 many algorithms with parameters derived from sensing
                 the physical world suffer large variations in execution
                 time, leading to pessimistic overall utilization, such
                 as visual recognition tasks. In this article, we
                 present ZS-QRAM, a scheduling approach that enables the
                 use of flexible execution times and application-derived
                 utility to tasks in order to maximize total system
                 utility. In particular, we provide a detailed
                 description of the algorithm, the formal proofs for its
                 temporal protection, and a detailed, evaluation. Our
                 evaluation uses the Utility Degradation Resilience
                 (UDR) showing that ZS-QRAM is able to obtain $ 4 \times
                 $ as much UDR as ZSRM, a previous overbooking approach,
                 and almost $ 2 \times $ as much UDR as Rate-Monotonic
                 with Period Transformation (RM/TP). We then evaluate a
                 Linux kernel module implementation of our scheduler on
                 an Unmanned Air Vehicle (UAV) platform. We show that,
                 by using our approach, we are able to keep the tasks
                 that render the most utility by degrading lower-utility
                 ones even in the presence of highly dynamic execution
                 times.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "162",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2014:STD,
  author =       "Kai Liu and Victor C. S. Lee and Joseph K. Y. Ng and
                 Sang H. Son and Edwin H.-M. Sha",
  title =        "Scheduling Temporal Data with Dynamic Snapshot
                 Consistency Requirement in Vehicular Cyber-Physical
                 Systems",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "163:1--163:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629546",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 6 16:07:59 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Timely and efficient data dissemination is one of the
                 fundamental requirements to enable innovative
                 applications in vehicular cyber-physical systems
                 (VCPS). In this work, we intensively analyze the
                 characteristics of temporal data dissemination in VCPS.
                 On this basis, we formulate the static and dynamic
                 snapshot consistency requirements on serving real-time
                 requests for temporal data items. Two online algorithms
                 are proposed to enhance the system performance with
                 different requirements. In particular, a reschedule
                 mechanism is developed to make the scheduling adaptable
                 to the dynamic snapshot consistency requirement. A
                 comprehensive performance evaluation demonstrates the
                 superiority of the proposed algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "163",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Goehringer:2014:ISI,
  author =       "Diana Goehringer",
  title =        "Introduction to the {Special Issue on Virtual
                 Prototyping of Parallel and Embedded Systems (ViPES)}",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "164:1--164:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2675739",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "164",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schumacher:2014:LLS,
  author =       "Christoph Schumacher and Jan Henrik Weinstock and
                 Rainer Leupers and Gerd Ascheid and Laura Tosoratto and
                 Alessandro Lonardo and Dietmar Petras and Andreas
                 Hoffmann",
  title =        "{legaSCi}: Legacy {SystemC} Model Integration into
                 Parallel Simulators",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "165:1--165:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2678018",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Architects and developers use virtual prototypes of
                 computer systems to receive early feedback on hardware
                 design decisions as well as to develop and debug system
                 software. This is facilitated by the comprehensive
                 inspection capabilities virtual prototypes offer. For
                 virtual prototypes, execution speed is crucial to
                 support the users' productivity. Parallel simulation
                 techniques are employed to offset the speed impact of
                 the increasing number of cores that need to be
                 simulated in virtual prototypes of parallel and
                 embedded systems. SystemC is the de facto industry
                 standard library for virtual platform modeling. Since
                 currently no parallel SystemC library is commonly
                 available, typical SystemC models are coded for
                 execution in sequential simulation environments. Simply
                 putting such models into parallel simulators may lead
                 to thread-safety issues and may additionally cause
                 nondeterministic simulator behavior. This article
                 proposes a methodology to support simulation creators
                 to face the challenge of integrating such legacy models
                 into parallel SystemC environments. The feasibility of
                 the proposed method is evaluated by parallelizing the
                 latest instance of the EU FP7 project EURETILE embedded
                 platform simulator. Using legaSCi, on four host
                 processor cores a speedup of 2.13$ \times $ is
                 demonstrated, without having to change the individual
                 models of the simulator.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "165",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Razaghi:2014:HCM,
  author =       "Parisa Razaghi and Andreas Gerstlauer",
  title =        "Host-Compiled Multicore System Simulation for Early
                 Real-Time Performance Evaluation",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "166:1--166:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2678020",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With increasing complexity and software content,
                 modern embedded platforms employ a heterogeneous mix of
                 multicore processors along with hardware accelerators
                 in order to provide high performance in limited power
                 budgets. To evaluate real-time performance and other
                 constraints, full system simulations are essential.
                 With traditional approaches being either slow or
                 inaccurate, so-called source-level or host-compiled
                 simulators have recently emerged as a solution for
                 rapid evaluation of the complete system at early design
                 stages. In such approaches, a faster simulation is
                 achieved by abstracting execution behavior and
                 increasing simulation granularity. However, existing
                 source-level simulators often focus on application
                 behavior only while neglecting the effects of
                 hardware/software interactions and their associated
                 speed and accuracy trade-offs. In this article, we
                 present a host-compiled simulator that emulates
                 software execution in a full-system context. Our
                 simulator incorporates abstract models of both
                 real-time operating systems (RTOSs) and multicore
                 processors to replicate timing-accurate
                 hardware/software interactions and to enable full
                 system cosimulation. An integrated approach for
                 automatic timing granularity adjustment (ATGA) uses
                 observations of the system state to automatically
                 control the timing model and optimally navigate speed
                 versus accuracy conditions. Results as applied to
                 industrial-strength platforms confirm that OS- and
                 system-level effects can significantly contribute to
                 overall accuracy and simulation overhead. By providing
                 careful abstractions, our models can achieve full
                 system simulations at equivalent speeds of more than a
                 thousand MIPS with less than 3\% timing error. Coupled
                 with the capability to easily adjust simulation
                 parameters and configurations, this demonstrates the
                 benefits of our simulator for early application
                 development and design space exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "166",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mihajlovic:2014:DIQ,
  author =       "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren
                 J. Gross",
  title =        "Dynamically Instrumenting the {QEMU} Emulator for
                 {Linux} Process Trace Generation with the {GDB}
                 Debugger",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "167:1--167:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2678022",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/gnu.bib;
                 https://www.math.utah.edu/pub/tex/bib/linux.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/unix.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "In software debugging, trace generation techniques are
                 used to resolve highly complex bugs. However, the
                 emulators increasingly used for embedded software
                 development do not yet offer the types of trace
                 generation infrastructure available in hardware. In
                 this article, we make changes to the ARM ISA emulation
                 of the QEMU emulator to allow for continuous
                 instruction-level trace generation. Using a standard
                 GDB client, tracepoints can be inserted to dynamically
                 log registers and memory addresses without altering
                 executing code. The ability to run trace experiments in
                 five different modes allows the scope of trace
                 generation to be narrowed as needed, down to the level
                 of a single Linux process. Our scheme collects the
                 execution traces of a Linux process on average between
                 9.6x--0.7x the speed of existing QEMU trace
                 capabilities, with 96.7\% less trace data volume.
                 Compared to a software-instrumented tracing scheme, our
                 method is both unobtrusive and performs on average
                 between 3--4 orders of magnitude faster.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "167",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Diamantopoulos:2014:PFS,
  author =       "Dionysios Diamantopoulos and Efstathios
                 Sotiriou-Xanthopoulos and Kostas Siozios and George
                 Economakos and Dimitrios Soudris",
  title =        "{Plug\&Chip}: a Framework for Supporting Rapid
                 Prototyping of {$3$D} Hybrid Virtual {SoCs}",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "168:1--168:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661634",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the embedded system domain there is a continuous
                 demand towards providing higher flexibility for
                 application development. This trend strives for virtual
                 prototyping solutions capable of performing fast system
                 simulation. Among other benefits, such a solution
                 supports concurrent hardware/software system design by
                 enabling to start developing, testing, and validating
                 the embedded software substantially earlier than has
                 been possible in the past. Towards this direction,
                 throughout this article we introduce a new framework,
                 named Plug\&Chip, targeting to support rapid
                 prototyping of 2D and 3D digital systems. In contrast
                 to other relevant approaches, our solution provides
                 higher flexibility by enabling incremental system
                 design, while also handling platforms developed with
                 the usage of 3D integration technology.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "168",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Siozios:2014:FSA,
  author =       "Kostas Siozios and Dimitrios Soudris and Michael
                 H{\"u}bner",
  title =        "A Framework for Supporting Adaptive Fault-Tolerant
                 Solutions",
  journal =      j-TECS,
  volume =       "13",
  number =       "5s",
  pages =        "169:1--169:??",
  month =        nov,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629473",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jan 7 15:03:31 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For decades, computer architects pursued one primary
                 goal: performance. The ever-faster transistors provided
                 by Moore's law were translated into remarkable gains in
                 operation frequency and power consumption. However, the
                 device-level size and architecture complexity impose
                 several new challenges, including a decrease in
                 dependability level due to physical failures. In this
                 article we propose a software-supported methodology
                 based on game theory for adapting the aggressiveness of
                 fault tolerance at runtime. Experimental results prove
                 the efficiency of our solution since it achieves
                 comparable fault masking to relevant solutions, but
                 with significantly lower mitigation cost. More
                 specifically, our framework speeds up the
                 identification of suspicious failure resources on
                 average by 76\% as compared to the HotSpot tool.
                 Similarly, the introduced solution leads to average
                 Power$ \times $Delay (PDP) savings against an existing
                 TMR approach by 53\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "169",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2015:ERS,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Regular, Special, and Related Issues",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2698230",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bishnoi:2015:BCC,
  author =       "Rimpy Bishnoi and Vijay Laxmi and Manoj Singh Gaur and
                 Jos{\'e} Flich and Francisco Trivi{\~n}o",
  title =        "A Brief Comment on {``A Complete Self-Testing and
                 Self-Configuring NoC Infrastructure for Cost-Effective
                 MPSoCs'' [ACM Transactions on Embedded Computing
                 Systems {\bf 12} (2013) Article 106]}",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2668121",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  note =         "See \cite{Ghiribaldi:2013:CST}.",
  abstract =     "In the Ghiribaldi et al. [2013] paper, a complete
                 self-testing and self configuring NoC infrastructure
                 for cost-effective MPSoCs was presented in order to
                 make NoC architecture tolerant to faults. To overcome
                 the complexity involved during the complete
                 reconfiguration of routing instances in the face of
                 most of the usual failure patterns, Ghiribaldi et al.
                 [2013] proposed a fast self-reconfiguration algorithm.
                 The algorithm is based on segment-based routing
                 implemented using Logic-Based Distributed Routing
                 (LBDR) and claimed to have handled the most common NoC
                 faults. The purpose of this comment is to demonstrate
                 the inconsistency of the fast self-configuration method
                 presented in Ghiribaldi et al. [2013]. To handle
                 inconsistency, we present the correct set of LBDR bits
                 and also argue that complete reconfiguration of the
                 routing instance is mandatory to handle some fault
                 combinations. New coverage results of the fast
                 self-reconfiguration algorithm of Ghiribaldi et al.
                 [2013] are also presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Munir:2015:MAF,
  author =       "Arslan Munir and Joseph Antoon and Ann Gordon-Ross",
  title =        "Modeling and Analysis of Fault Detection and Fault
                 Tolerance in Wireless Sensor Networks",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680538",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Technological advancements in communications and
                 embedded systems have led to the proliferation of
                 Wireless Sensor Networks (WSNs) in a wide variety of
                 application domains. These application domains include
                 but are not limited to mission-critical (e.g.,
                 security, defense, space, satellite) or safety-related
                 (e.g., health care, active volcano monitoring) systems.
                 One commonality across all WSN application domains is
                 the need to meet application requirements (e.g.,
                 lifetime, reliability). Many application domains
                 require that sensor nodes be deployed in harsh
                 environments, such as on the ocean floor or in an
                 active volcano, making these nodes more prone to
                 failures. Sensor node failures can be catastrophic for
                 critical or safety-related systems. This article models
                 and analyzes fault detection and fault tolerance in
                 WSNs. To determine the effectiveness and accuracy of
                 fault detection algorithms, we simulate these
                 algorithms using ns-2. We investigate the synergy
                 between fault detection and fault tolerance and use the
                 fault detection algorithms' accuracies in our modeling
                 of Fault-Tolerant (FT) WSNs. We develop Markov models
                 for characterizing WSN reliability and Mean Time to
                 Failure (MTTF) to facilitate WSN application-specific
                 design. Results obtained from our FT modeling reveal
                 that an FT WSN composed of duplex sensor nodes can
                 result in as high as a 100\% MTTF increase and
                 approximately a 350\% improvement in reliability over a
                 Non-Fault-Tolerant (NFT) WSN. The article also
                 highlights future research directions for the design
                 and deployment of reliable and trustworthy WSNs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sarkar:2015:STP,
  author =       "Abhik Sarkar and Frank Mueller and Harini Ramaprasad",
  title =        "Static Task Partitioning for Locked Caches in
                 Multicore Real-Time Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638557",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Growing processing demand on multitasking real-time
                 systems can be met by employing scalable multicore
                 architectures. For such environments, locking cache
                 lines for hard real-time systems ensures timing
                 predictability of data references and may lower
                 worst-case execution time. This work studies the
                 benefits of cache locking on massive multicore
                 architectures with private caches in the context of
                 hard real-time systems. In shared cache architectures,
                 the cache is a single resource shared among all of the
                 tasks. However, in scalable cache architectures with
                 private caches, conflicts exist only among the tasks
                 scheduled on one core. This calls for a cache-aware
                 allocation of tasks onto cores. The objective of this
                 work is to increase the predictability of memory
                 accesses resolved by caches while reducing the number
                 of cores for a given task set. This allows designers to
                 reduce the footprint of their subsystem of real-time
                 tasks and thereby cost, either by choosing a product
                 with fewer cores as a target or to allow more
                 subsystems to be co-located on a given fixed number of
                 cores. Our work proposes a novel variant of the
                 cache-unaware First Fit Decreasing (FFD) algorithm
                 called Naive locked First Fit Decreasing (NFFD) policy.
                 We propose two cache-aware static scheduling schemes:
                 (a) Greedy First Fit Decreasing (GFFD) and (b) Colored
                 First Fit Decreasing (CoFFD) for task sets where tasks
                 do not have intratask conflicts among locked regions
                 (Scenario A). NFFD is capable of scheduling high
                 utilization task sets that FFD cannot schedule.
                 Experiments also show that CoFFD consistently
                 outperforms GFFD, resulting in a lower number of cores
                 and lower system utilization. CoFFD reduces the number
                 of core requirements by 30\% to 60\% compared to NFFD.
                 For a more generic case where tasks have intratask
                 conflicts, we split the task partitioning between two
                 phases: task selection and task allocation (Scenario
                 B). Instead of resolving conflicts at a global level,
                 these algorithms resolve conflicts among regions while
                 allocating a task onto a core and unlocking at region
                 level instead of task level. We show that a combination
                 of dynamic ordering (task selection) with Chaitin's
                 Coloring (task allocation) scheme reduces the number of
                 cores required by up to 22\% over a basic scheme (in a
                 combination of monotone ordering and regional FFD).
                 Regional unlocking allows this scheme to outperform
                 CoFFD for medium utilization task sets from Scenario A.
                 However, CoFFD performs better than any other scheme
                 for high utilization task sets from Scenario A.
                 Overall, this work is unique in considering the
                 challenges of future multicore architectures for
                 real-time systems and provides key insights into task
                 partitioning and cache-locking mechanisms for
                 architectures with private caches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tillenius:2015:RAT,
  author =       "Martin Tillenius and Elisabeth Larsson and Rosa M.
                 Badia and Xavier Martorell",
  title =        "Resource-Aware Task Scheduling",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638554",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dependency-aware task-based parallel programming
                 models have proven to be successful for developing
                 efficient application software for multicore-based
                 computer architectures. The programming model is
                 amenable to programmers, thereby supporting
                 productivity, whereas hardware performance is achieved
                 through a runtime system that dynamically schedules
                 tasks onto cores in such a way that all dependencies
                 are respected. However, even if the scheduling is
                 completely successful with respect to load balancing,
                 the scaling with the number of cores may be suboptimal
                 due to resource contention. Here we consider the
                 problem of scheduling tasks not only with respect to
                 their interdependencies but also with respect to their
                 usage of resources, such as memory and bandwidth. At
                 the software level, this is achieved by user
                 annotations of the task resource consumption. In the
                 runtime system, the annotations are translated into
                 scheduling constraints. Experimental results for
                 different hardware, demonstrating performance gains
                 both for model examples and real applications, are
                 presented. Furthermore, we provide a set of tools to
                 detect resource sensitivity and predict the performance
                 improvements that can be achieved by resource-aware
                 scheduling. These tools are solely based on parallel
                 execution traces and require no instrumentation or
                 modification of the application code.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2015:JWU,
  author =       "Yazhi Huang and Mengying Zhao and Chun Jason Xue",
  title =        "Joint {WCET} and Update Activity Minimization for
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680539",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A cyber-physical system (CPS) is a desirable computing
                 platform for many industrial and scientific
                 applications, such as industrial process monitoring,
                 environmental monitoring, chemical processes, and
                 battlefield surveillance. The application of CPSs has
                 two challenges: First, CPSs often include a number of
                 sensor nodes. Update of preloaded code on remote sensor
                 nodes powered by batteries is extremely energy
                 consuming. The code update issue in the
                 energy-sensitive CPS must be carefully considered.
                 Second, CPSs are often real-time embedded systems with
                 real-time properties. Worst-case execution time (WCET)
                 is one of the most important metrics in real-time
                 system design. Whereas existing works only consider one
                 of these two challenges at a time, in this article, a
                 compiler optimization-joint WCET and update-conscious
                 compilation, or WUCC-is proposed to jointly consider
                 WCET and code update for CPSs. The novelty of the
                 proposed approach is that the WCET problem and code
                 update problem are considered concurrently such that a
                 balanced solution with minimal WCET and minimal code
                 difference can be achieved. The experimental results
                 show that the proposed technique can minimize WCET and
                 code difference effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bertozzi:2015:PRA,
  author =       "Davide Bertozzi and Stefano {Di Carlo} and Salvatore
                 Galfano and Marco Indaco and Piero Olivo and Paolo
                 Prinetto and Cristian Zambelli",
  title =        "Performance and Reliability Analysis of Cross-Layer
                 Optimizations of {NAND} Flash Controllers",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629562",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "NAND flash memories are becoming the predominant
                 technology in the implementation of mass storage
                 systems for both embedded and high-performance
                 applications. However, when considering data and code
                 storage in Non-Volatile Memories (NVMs), such as NAND
                 flash memories, reliability and performance become a
                 serious concern for systems designers. Designing NAND
                 flash-based systems based on worst-case scenarios leads
                 to waste of resources in terms of performance, power
                 consumption, and storage capacity. This is clearly in
                 contrast with the request for runtime
                 reconfigurability, adaptivity, and resource
                 optimization in modern computing systems. There is a
                 clear trend toward supporting differentiated access
                 modes in flash memory controllers, each one setting a
                 differentiated tradeoff point in the
                 performance-reliability optimization space. This is
                 supported by the possibility of tuning the NAND flash
                 memory performance, reliability, and power consumption
                 through several tuning knobs such as the flash
                 programming algorithm and the flash error correcting
                 code. However, to successfully exploit these degrees of
                 freedom, it is mandatory to clearly understand the
                 effect that the combined tuning of these parameters has
                 on the full NVM subsystem. This article performs a
                 comprehensive quantitative analysis of the benefits
                 provided by the runtime reconfigurability of an MLC
                 NAND flash controller through the combined effect of an
                 adaptable memory programming circuitry coupled with
                 runtime adaptation of the ECC correction capability.
                 The full NVM subsystem is taken into account, starting
                 from a characterization of the low-level circuitry to
                 the effect of the adaptation on a wide set of realistic
                 benchmarks in order to provide readers a clear view of
                 the benefit this combined adaptation may provide at the
                 system level.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2015:SLP,
  author =       "Ye-Jyun Lin and Chia-Lin Yang and Jiao-We Huang and
                 Tay-Jyi Lin and Chih-Wen Hsueh and Naehyuck Chang",
  title =        "System-Level Performance and Power Optimization for
                 {MPSoC}: a Memory Access-Aware Approach",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As the number of IPs in a multimedia Multi-Processor
                 System-on-Chip (MPSoC) continues to increase,
                 concurrent memory accesses from different IPs
                 increasingly stress memory systems, which presents both
                 opportunities and challenges for future MPSoC design.
                 The impact of such requirements on the system-level
                 design for MPSoC is twofold. First, contention among
                 IPs prolongs memory access time, which exacerbates the
                 persisting memory wall problem. Second, longer memory
                 accesses lead to longer IP stall time, which results in
                 unnecessary leakage waste. In this article, we propose
                 two memory access-aware system-level design approaches
                 for performance and leakage optimization. To alleviate
                 the memory wall problem, we propose a Hierarchical
                 Memory Scheduling (HMS) policy that schedules memory
                 requests from the same IP and application consecutively
                 to reduce interference among memory accesses from
                 different IPs with a fairness guarantee. To reduce IP
                 leakage waste due to long memory access, we propose a
                 memory access-aware power-gating policy. A
                 straightforward power-gating approach is to power gate
                 an IP when it needs to fetch data from memory. However,
                 due to the response time variation among memory
                 accesses, aggressively power gating an IP whenever a
                 memory request occurs may result in incorrect
                 power-gating decisions. The proposed memory
                 access-aware power-gating policy makes these decisions
                 judiciously, based on the predicted memory latency of
                 an individual IP and its energy breakeven time. The
                 experimental results show that the proposed HMS memory
                 scheduling policy improves system throughput by 42\%
                 compared to First-Come-First-Serve (FCFS) and by 21\%
                 compared to First-Ready First-Come-First-Serve
                 (FR-FCFS) on an MPSoC for mobile phones. For the
                 improvement of fairness, HMS improves fairness by 1.52$
                 \times $ compared to FCFS and by 1.23$ \times $
                 compared to FRFCFS. In the aspect of leakage
                 optimization, our memory access-aware power-gating
                 mechanism improves energy savings by 3.88$ \times $ and
                 reduces the performance penalty by 70\% compared to
                 conventional timeout-based power gating. We further
                 demonstrate that our HMS memory scheduler can regulate
                 memory access orders, thereby reducing memory response
                 time variation. This leads to more accurate power-down
                 decisions for both conventional timeout power gating
                 and the proposed memory access- aware power gating.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Borgstrom:2015:PCW,
  author =       "Johannes Borgstrom and Ramunas Gutkovas and Ioana
                 Rodhe and Bj{\"o}rn Victor",
  title =        "The Psi-Calculi Workbench: a Generic Tool for Applied
                 Process Calculi",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2682570",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Psi-calculi is a parametric framework for extensions
                 of the pi-calculus with arbitrary data and logic. All
                 instances of the framework inherit machine-checked
                 proofs of the metatheory such as compositionality and
                 bisimulation congruence. We present a generic analysis
                 tool for psi-calculus instances, enabling symbolic
                 execution and (bi)simulation checking for both unicast
                 and broadcast communication. The tool also provides a
                 library for implementing new psi-calculus instances. We
                 provide examples from traditional communication
                 protocols and wireless sensor networks. We also
                 describe the theoretical foundations of the tool,
                 including an improved symbolic operational semantics,
                 with additional support for scoped broadcast
                 communication.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{VanHulst:2015:MSH,
  author =       "A. C. {Van Hulst} and M. A. Reniers and W. J.
                 Fokkink",
  title =        "Maximal Synthesis for {Hennessy--Milner} Logic",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680540",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article concerns the maximal synthesis for
                 Hennessy--Milner Logic on Kripke structures with
                 labeled transitions. We formally define, and prove the
                 validity of, a theoretical framework that modifies a
                 Kripke model to the least possible extent in order to
                 satisfy a given HML formula. Applications of this work
                 can be found in the field of controller synthesis and
                 supervisory control for discrete-event systems.
                 Synthesis is realized technically by first projecting
                 the given Kripke model onto a bisimulation-equivalent
                 partial tree representation, thereby unfolding up to
                 the depth of the synthesized formula. Operational rules
                 then define the required adaptations upon this
                 structure in order to achieve validity of the
                 synthesized formula. Synthesis might result in multiple
                 valid adaptations, which are all related to the
                 original model via simulation. Each simulant of the
                 original Kripke model, which satisfies the synthesized
                 formula, is also related to one of the synthesis
                 results via simulation. This indicates maximality, or
                 maximal permissiveness, in the context of supervisory
                 control. In addition to the formal construction of
                 synthesis as presented in this article, we present it
                 in algorithmic form and analyze its computational
                 complexity. Computer-verified proofs for two important
                 theorems in this article have been created using the
                 Coq proof assistant.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boucheneb:2015:SST,
  author =       "Hanifa Boucheneb and Kamel Barkaoui",
  title =        "Stubborn Sets for Time {Petri} Nets",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The main limitation of the verification approaches
                 based on state enumeration is the state explosion
                 problem. The partial order reduction techniques aim at
                 attenuating this problem by reducing the number of
                 transitions to be fired from each state while
                 preserving properties of interest. Among the reduction
                 techniques proposed in the literature, this article
                 considers the stubborn set method of Petri nets and
                 investigates its extension to time Petri nets. It
                 establishes some useful sufficient conditions for
                 stubborn sets, which preserve deadlocks and
                 k-boundedness of places.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2015:HFY,
  author =       "Abhisek Pan and Rance Rodrigues and Sandip Kundu",
  title =        "A Hardware Framework for Yield and Reliability
                 Enhancement in Chip Multiprocessors",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629688",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Device reliability and manufacturability have emerged
                 as dominant concerns in end-of-road CMOS devices. An
                 increasing number of hardware failures are attributed
                 to manufacturability or reliability problems.
                 Maintaining an acceptable manufacturing yield for chips
                 containing tens of billions of transistors with wide
                 variations in device parameters has been identified as
                 a great challenge. Additionally, today's nanometer
                 scale devices suffer from accelerated aging effects
                 because of the extreme operating temperature and
                 electric fields they are subjected to. Unless addressed
                 in design, aging-related defects can significantly
                 reduce the lifetime of a product. In this article, we
                 investigate a micro-architectural scheme for improving
                 yield and reliability of homogeneous chip
                 multiprocessors (CMPs). The proposed solution involves
                 a hardware framework that enables us to utilize the
                 redundancies inherent in a multicore system to keep the
                 system operational in the face of partial failures. A
                 micro-architectural modification allows a faulty core
                 in a CMP to use another core's resources to service any
                 instruction that the former cannot execute correctly by
                 itself. This service improves yield and reliability but
                 may cause loss of performance. The target platform for
                 quantitative evaluation of performance under
                 degradation is a dual-core and a quad-core chip
                 multiprocessor with one or more cores sustaining
                 partial failure. Simulation studies indicate that when
                 a large, high-latency, and sparingly used unit such as
                 a floating-point unit fails in a core, correct
                 execution may be sustained through outsourcing with at
                 most a 16\% impact on performance for a floating-point
                 intensive application. For applications with moderate
                 floating-point load, the degradation is insignificant.
                 The performance impact may be mitigated even further by
                 judicious selection of the cores to commandeer
                 depending on the current load on each of the candidate
                 cores. The area overhead is also negligible due to
                 resource reuse.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lazarescu:2015:ITB,
  author =       "Mihai T. Lazarescu and Luciano Lavagno",
  title =        "Interactive Trace-Based Analysis Toolset for Manual
                 Parallelization of {C} Programs",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638556",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Massive amounts of legacy sequential code need to be
                 parallelized to make better use of modern
                 multiprocessor architectures. Nevertheless, writing
                 parallel programs is still a difficult task. Automated
                 parallelization methods can be effective both at the
                 statement and loop levels and, recently, at the task
                 level, but they are still restricted to specific source
                 code constructs or application domains. We present in
                 this article an innovative toolset that supports
                 developers when performing manual code analysis and
                 parallelization decisions. It automatically collects
                 and represents the program profile and data
                 dependencies in an interactive graphical format that
                 facilitates the analysis and discovery of manual
                 parallelization opportunities. The toolset can be used
                 for arbitrary sequential C programs and parallelization
                 patterns. Also, its program-scope data dependency
                 tracing at runtime can complement the tools based on
                 static code analysis and can also benefit from it at
                 the same time. We also tested the effectiveness of the
                 toolset in terms of time to reach parallelization
                 decisions and of their quality. We measured a
                 significant improvement for several real-world
                 representative applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Quan:2015:HTM,
  author =       "Wei Quan and Andy D. Pimentel",
  title =        "A Hybrid Task Mapping Algorithm for Heterogeneous
                 {MPSoCs}",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680542",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The application workloads in modern MPSoC-based
                 embedded systems are becoming increasingly dynamic.
                 Different applications concurrently execute and contend
                 for resources in such systems, which could cause
                 serious changes in the intensity and nature of the
                 workload demands over time. To cope with the dynamism
                 of application workloads at runtime and improve the
                 efficiency of the underlying system architecture, this
                 article presents a hybrid task mapping algorithm that
                 combines a static mapping exploration and a dynamic
                 mapping optimization to achieve an overall improvement
                 of system efficiency. We evaluate our algorithm using a
                 heterogeneous MPSoC system with three real
                 applications. Experimental results reveal the
                 effectiveness of our proposed algorithm by comparing
                 derived solutions to the ones obtained from several
                 other runtime mapping algorithms. In test cases with
                 three simultaneously active applications, the mapping
                 solutions derived by our approach have average
                 performance improvements ranging from 45.9\% to 105.9\%
                 and average energy savings ranging from 14.6\% to
                 23.5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Petrucci:2015:EET,
  author =       "Vinicius Petrucci and Orlando Loques and Daniel
                 Moss{\'e} and Rami Melhem and Neven Abou Gazala and
                 Sameh Gobriel",
  title =        "Energy-Efficient Thread Assignment Optimization for
                 Heterogeneous Multicore Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2566618",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The current trend to move from homogeneous to
                 heterogeneous multicore systems provides compelling
                 opportunities for achieving performance and energy
                 efficiency goals. Running multiple threads in multicore
                 systems poses challenges on meeting limited shared
                 resources, such as memory bandwidth. We propose an
                 optimization approach that includes an Integer Linear
                 Programming (ILP) optimization model and a scheme to
                 dynamically determine thread-to-core assignment. We
                 present simulation analysis that shows energy savings
                 and performance gains for a variety of workloads
                 compared to state-of-the-art schemes. We implemented
                 and evaluated a prototype of our thread assignment
                 approach at user level, leveraging Linux scheduling and
                 performance-monitoring capabilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2015:ESV,
  author =       "Zhengfeng Yang and Wang Lin and Min Wu",
  title =        "Exact Safety Verification of Hybrid Systems Based on
                 Bilinear {SOS} Representation",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629424",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we address the problem of safety
                 verification of nonlinear hybrid systems. A hybrid
                 symbolic-numeric method is presented to compute exact
                 inequality invariants of hybrid systems efficiently.
                 Some numerical invariants of a hybrid system can be
                 obtained by solving a bilinear SOS programming via the
                 PENBMI solver or iterative method, then the modified
                 Newton refinement and rational vector recovery
                 techniques are applied to obtain exact polynomial
                 invariants with rational coefficients, which exactly
                 satisfy the conditions of invariants. Experiments on
                 some benchmarks are given to illustrate the efficiency
                 of our algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rodrigues:2015:DSE,
  author =       "Rance Rodrigues and Israel Koren and Sandip Kundu",
  title =        "Does the Sharing of Execution Units Improve
                 Performance\slash Power of Multicores?",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2680543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Several studies and recent real-world designs have
                 promoted sharing of underutilized resources between
                 cores in a multicore processor to achieve better
                 performance/power. It has been argued that when
                 utilization of such resources is low, sharing has a
                 negligible impact on performance while offering
                 considerable area and power benefits. In this article,
                 we investigate the performance and performance/watt
                 implications of sharing large and underutilized
                 resources between pairs of cores in a multicore. We
                 first study sharing of the entire floating-point
                 datapath (including reservation stations and execution
                 units) by two cores, similar to AMD's Bulldozer. We
                 find that while this architecture results in power
                 savings for certain workload combinations, it also
                 results in significant performance loss of up to 28\%.
                 Next, we study an alternative sharing architecture
                 where only the floating-point execution units are
                 shared, while the individual cores retain their
                 reservation stations. This reduces the highest
                 performance loss to 14\%. We then extend the study to
                 include sharing of other large execution units that are
                 used infrequently, namely, the integer multiply and
                 divide units. Subsequently, we analyze the impact of
                 sharing hardware resources in Simultaneously
                 Multithreaded (SMT) processors where multiple threads
                 run concurrently on the same core. We observe that
                 sharing improves performance/watt at a negligible
                 performance cost only if the shared units have high
                 throughput. Sharing low-throughput units reduces both
                 performance and performance/watt. To increase the
                 throughput of the shared units, we propose the use of
                 Dynamic Voltage and Frequency Boosting (DVFB) of only
                 the shared units that can be placed on a separate
                 voltage island. Our results indicate that the use of
                 DVFB improves both performance and performance/watt by
                 as much as 22\% and 10\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Diamantopoulos:2015:GPA,
  author =       "Dionysios Diamantopoulos and Kostas Siozios and
                 Sotirios Xydis and Dimitrios Soudris",
  title =        "{GENESIS}: Parallel Application Placement onto
                 Reconfigurable Architectures (Invited for the Special
                 Issue on Runtime Management)",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629651",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Placement is though as the most time-consuming
                 processes in physical implementation flows for
                 reconfigurable architectures, while it highly affects
                 the quality of derived application implementation, as
                 it has impact on the maximum operating frequency.
                 Throughout this article, we propose a novel placer,
                 based on genetic algorithm, targeting to FPGAs. Rather
                 than relevant approaches, which are executed
                 sequentially, the new placer exhibits inherent
                 parallelism, which can benefit from multicore
                 processors. Experimental results prove the
                 effectiveness of this solution, as it achieves average
                 reduction of execution runtime and application's delay
                 by 67$ \times $ and 16\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pager:2015:SSM,
  author =       "Jared Pager and Reiley Jeyapaul and Aviral
                 Shrivastava",
  title =        "A Software Scheme for Multithreading on {CGRAs}",
  journal =      j-TECS,
  volume =       "14",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638558",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 22 06:25:23 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent industry trends show a drastic rise in the use
                 of hand-held embedded devices, from everyday
                 applications to medical (e.g., monitoring devices) and
                 critical defense applications (e.g., sensor nodes). The
                 two key requirements in the design of such devices are
                 their processing capabilities and battery life. There
                 is therefore an urgency to build high-performance and
                 power-efficient embedded devices, inspiring researchers
                 to develop novel system designs for the same. The use
                 of a coprocessor (application-specific hardware) to
                 offload power-hungry computations is gaining favor
                 among system designers to suit their power budgets. We
                 propose the use of CGRAs (Coarse-Grained Reconfigurable
                 Arrays) as a power-efficient coprocessor. Though CGRAs
                 have been widely used for streaming applications, the
                 extensive compiler support required limits its
                 applicability and use as a general purpose coprocessor.
                 In addition, a CGRA structure can efficiently execute
                 only one statically scheduled kernel at a time, which
                 is a serious limitation when used as an accelerator to
                 a multithreaded or multitasking processor. In this
                 work, we envision a multithreaded CGRA where multiple
                 schedules (or kernels) can be executed simultaneously
                 on the CGRA (as a coprocessor). We propose a
                 comprehensive software scheme that transforms the
                 traditionally single-threaded CGRA into a multithreaded
                 coprocessor to be used as a power-efficient accelerator
                 for multithreaded embedded processors. Our software
                 scheme includes (1) a compiler framework that
                 integrates with existing CGRA mapping techniques to
                 prepare kernels for execution on the multithreaded CGRA
                 and (2) a runtime mechanism that dynamically schedules
                 multiple kernels (offloaded from the processor) to
                 execute simultaneously on the CGRA coprocessor. Our
                 multithreaded CGRA coprocessor implementation thus
                 makes it possible to achieve improved power-efficient
                 computing in modern multithreaded embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2015:EOS,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Oh Security --- Where Art Thou?",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742044",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rafiliu:2015:SOR,
  author =       "Sergiu Rafiliu and Petru Eles and Zebo Peng and
                 Michael Lemmon",
  title =        "Stability of Online Resource Managers for Distributed
                 Systems under Execution Time Variations",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629495",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today's embedded systems are exposed to variations in
                 resource usage due to complex software applications,
                 hardware platforms, and impact of the runtime
                 environments. When these variations are large and
                 efficiency is required, on-line resource managers may
                 be deployed on the system to help it control its
                 resource usage. An often neglected problem is whether
                 these resource managers are stable, meaning that the
                 resource usage is controlled under all possible
                 scenarios. In distributed systems, this problem is
                 particularly hard because applications distributed over
                 many resources generate complex dependencies between
                 their resources. In this article, we develop a
                 mathematical model of the system, and derive conditions
                 that, if satisfied, guarantee stability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Loke:2015:MCS,
  author =       "Seng W. Loke and Keegan Napier and Abdulaziz Alali and
                 Niroshinie Fernando and Wenny Rahayu",
  title =        "Mobile Computations with Surrounding Devices:
                 Proximity Sensing and {MultiLayered} Work Stealing",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656214",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the proliferation of mobile devices, and their
                 increasingly powerful embedded processors and storage,
                 vast resources increasingly surround users. We have
                 been investigating the concept of on-demand ad hoc
                 forming of groups of nearby mobile devices in the midst
                 of crowds to cooperatively perform computationally
                 intensive tasks as a service to local mobile users, or
                 what we call mobile crowd computing. As devices can
                 vary in processing power and some can leave a group
                 unexpectedly or new devices join in, there is a need
                 for algorithms that can distribute work in a flexible
                 manner and still work with different arrangements of
                 devices that can arise in an ad hoc fashion. In this
                 article, we first argue for the feasibility of such use
                 of crowd-embedded computations using theoretical
                 justifications and reporting on our experiments on
                 Bluetooth-based proximity sensing. We then present a
                 multilayered work-stealing style algorithm for
                 distributing work efficiently among mobile devices and
                 compare speedups attainable for different topologies of
                 devices networked with Bluetooth, justifying a
                 topology-flexible opportunistic approach. While our
                 experiments are with Bluetooth and mobile devices, the
                 approach is applicable to ecosystems of various
                 embedded devices with powerful processors, networking
                 technologies, and storage that will increasingly
                 surround users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Malik:2015:HRT,
  author =       "Avinash Malik and David Gregg",
  title =        "Heuristics on Reachability Trees for Bicriteria
                 Scheduling of Stream Graphs on Heterogeneous
                 Multiprocessor Architectures",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638553",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we partition and schedule Synchronous
                 Dataflow (SDF) graphs onto heterogeneous execution
                 architectures in such a way as to minimize energy
                 consumption and maximize throughput. Partitioning and
                 scheduling SDF graphs onto homogeneous architectures is
                 a well-known NP-hard problem. The heterogeneity of the
                 execution architecture makes our problem exponentially
                 challenging to solve. We model the problem as a
                 weighted sum and solve it using novel state space
                 exploration inspired from the theory of parallel
                 automata. The resultant heuristic algorithm results in
                 good scheduling when implemented in an existing stream
                 framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Martin:2015:ROS,
  author =       "Paul Martin and Lucas Wanner and Mani Srivastava",
  title =        "Runtime Optimization of System Utility with Variable
                 Hardware",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Increasing hardware variability in newer integrated
                 circuit fabrication technologies has caused
                 corresponding power variations on a large scale. These
                 variations are particularly exaggerated for idle power
                 consumption, motivating the need to mitigate the
                 effects of variability in systems whose operation is
                 dominated by long idle states with periodic active
                 states. In systems where computation is severely
                 limited by anemic energy reserves and where a long
                 overall system lifetime is desired, maximizing the
                 quality of a given application subject to these
                 constraints is both challenging and an important step
                 toward achieving high-quality deployments. This work
                 describes VaRTOS, an architecture and corresponding set
                 of operating system abstractions that provide explicit
                 treatment of both idle and active power variations for
                 tasks running in real-time operating systems. Tasks in
                 VaRTOS express elasticity by exposing individual knobs
                 -shared variables that the operating system can tune to
                 adjust task quality and, correspondingly, task power,
                 maximizing application utility both on a per-task and
                 on a system-wide basis. We provide results regarding
                 online learning of instance-specific sleep power,
                 active power, and task-level power expenditure on
                 simulated hardware with demonstrated effects for
                 several prototypical applications. Our results on
                 networked sensing applications, which are
                 representative of a broader category of applications
                 that VaRTOS targets, show that VaRTOS can reduce
                 variability-induced energy expenditure errors from over
                 70\% in many cases to under 2\% in most cases and under
                 5\% in the worst case.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gomony:2015:RTM,
  author =       "Manil Dev Gomony and Benny Akesson and Kees Goossens",
  title =        "A Real-Time Multichannel Memory Controller and Optimal
                 Mapping of Memory Clients to Memory Channels",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2661635",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Ever-increasing demands for main memory bandwidth and
                 memory speed/power tradeoff led to the introduction of
                 memories with multiple memory channels, such as Wide IO
                 DRAM. Efficient utilization of a multichannel memory as
                 a shared resource in multiprocessor real-time systems
                 depends on mapping of the memory clients to the memory
                 channels according to their requirements on latency,
                 bandwidth, communication, and memory capacity. However,
                 there is currently no real-time memory controller for
                 multichannel memories, and there is no methodology to
                 optimally configure multichannel memories in real-time
                 systems. As a first work toward this direction, we
                 present two main contributions in this article: (1) a
                 configurable real-time multichannel memory controller
                 architecture with a novel method for
                 logical-to-physical address translation and (2) two
                 design-time methods to map memory clients to the memory
                 channels, one an optimal algorithm based on an integer
                 programming formulation of the mapping problem, and the
                 other a fast heuristic algorithm. We demonstrate the
                 real-time guarantees on bandwidth and latency provided
                 by our multichannel memory controller architecture by
                 experimental evaluation. Furthermore, we compare the
                 performance of the mapping problem formulation in a
                 solver and the heuristic algorithm against two existing
                 mapping algorithms in terms of computation time and
                 mapping success ratio. We show that an optimal solution
                 can be found in 2 hours using the solver and in less
                 than 1 second with less than 7\% mapping failure using
                 the heuristic for realistically sized problems.
                 Finally, we demonstrate configuring a Wide IO DRAM in a
                 high-definition (HD) video and graphics processing
                 system to emphasize the practical applicability and
                 effectiveness of this work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jezequel:2015:FPA,
  author =       "Lo{\"\i}g Jezequel and Eric Fabre and Victor
                 Khomenko",
  title =        "Factored Planning: From Automata to {Petri} Nets",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656215",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Factored planning mitigates the state explosion
                 problem by avoiding the construction of the state space
                 of the whole system and instead working with the
                 system's components. Traditionally, finite automata
                 have been used to represent the components, with the
                 overall system being represented as their product. In
                 this article, we change the representation of
                 components to safe Petri nets. This allows one to use
                 cheap structural operations like transition
                 contractions to reduce the size of the Petri net before
                 its state space is generated, which often leads to
                 substantial savings compared with automata. The
                 proposed approach has been implemented and proved
                 efficient on several factored planning benchmarks. This
                 article is an extended version of our ACSD 2013 paper
                 [Jezequel et al. 2013], with the addition of the proofs
                 and the experimental results of Sections 6 and 7.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Taniuchi:2015:AUI,
  author =       "Daisuke Taniuchi and Takuya Maekawa",
  title =        "Automatic Update of Indoor Location Fingerprints with
                 Pedestrian Dead Reckoning",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2667226",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a new method for
                 automatically updating a Wi-Fi indoor positioning model
                 on a cloud server by employing uploaded sensor data
                 obtained from the smartphone sensors of a specific user
                 who spends a lot of time in a given environment (e.g.,
                 a worker in the environment). In this work, we attempt
                 to track the user with pedestrian dead reckoning
                 techniques, and at the same time we obtain Wi-Fi scan
                 data from a mobile device possessed by the user. With
                 the scan data and the estimated coordinates uploaded to
                 a cloud server, we can automatically create a pair
                 consisting of a scan and its corresponding indoor
                 coordinates during the user's daily life and update an
                 indoor positioning model on the server by using the
                 information. With this approach, we try to cope with
                 the instability of Wi-Fi-based positioning methods
                 caused by changing environmental dynamics, that is,
                 layout changes and moving or removal of Wi-Fi access
                 points. Therefore, ordinary users (e.g., customers) who
                 do not have rich sensors can benefit from the
                 continually updating positioning model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jimenez:2015:LSC,
  author =       "Xavier Jimenez and David Novo and Paolo Ienne",
  title =        "{Libra}: Software-Controlled Cell Bit-Density to
                 Balance Wear in {NAND} Flash",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2638552",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hybrid flash storages combine a small Single-Level
                 Cell (SLC) partition with a large Multilevel Cell (MLC)
                 partition. Compared to MLC-only solutions, the SLC
                 partition exploits fast and short local write updates,
                 while the MLC part brings large capacity. On the whole,
                 hybrid storage achieves a tangible performance
                 improvement for a moderate extra cost. Yet, device
                 lifetime is an important aspect often overlooked: in a
                 hybrid system, a large ratio of writes may be directed
                 to the small SLC partition, thus generating a local
                 stress that could exhaust the SLC lifetime
                 significantly sooner than the MLC partition's. To
                 address this issue, we propose Libra, which builds on
                 flash storage made solely of MLC flash and uses the
                 memory devices in SLC mode when appropriate; that is,
                 we exploit the fact that writing a single bit per cell
                 in an MLC provides characteristics close to those of an
                 ordinary SLC. In our scheme, the cell bit-density of a
                 block can be decided dynamically by the flash
                 controller, and the physical location of the SLC
                 partition can now be moved around the whole device,
                 balancing wear across it. This article provides a
                 thorough analysis and characterization of the SLC mode
                 for MLCs and gives evidence that the inherent
                 flexibility provided by Libra simplifies considerably
                 the stress balance on the device. Overall, our
                 technique improves lifetime by up to one order of
                 magnitude at no cost when compared to any hybrid
                 storage that relies on a static SLC-MLC partitioning.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2015:PVL,
  author =       "Li-Pin Chang and Yo-Chuan Su and I-Chen Wu",
  title =        "Plugging Versus Logging: Adaptive Buffer Management
                 for Hybrid-Mapping {SSDs}",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629455",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A promising technique to improve the write performance
                 of solid-state disks (SSDs) is to use a disk write
                 buffer. The goals of a write buffer is not only to
                 reduce the write traffic to the flash chips but also to
                 convert host write patterns into long and sequential
                 write bursts. This study proposes a new buffer design
                 consisting of a replacement policy and a write-back
                 policy. The buffer monitors how the host workload
                 stresses the flash translation layer upon garbage
                 collection. This is used to dynamically adjust its
                 replacement and write-back strategies for a good
                 balance between write sequentiality and write
                 randomness. When the garbage collection overhead is
                 low, the write buffer favors high write sequentiality
                 over low write randomness. When the flash translation
                 layer observes a high overhead of garbage collection,
                 the write buffer favors low write randomness over high
                 write sequentiality. The proposed buffer design
                 outperformed existing approaches by up to 20\% under
                 various workloads and flash translation algorithms, as
                 will be shown in experiment results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jia:2015:TAD,
  author =       "Zhiping Jia and Yang Li and Yi Wang and Meng Wang and
                 Zili Shao",
  title =        "Temperature-Aware Data Allocation for Embedded Systems
                 with Cache and Scratchpad Memory",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629650",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The hybrid memory architecture that contains both
                 on-chip cache and scratchpad memory (SPM) has been
                 widely used in embedded systems. In this article, we
                 explore this hybrid memory architecture by jointly
                 optimizing time performance and temperature for
                 embedded systems with loops. Our basic idea is to
                 adaptively adjust the workload distribution between
                 cache and SPM based on the current temperature. For a
                 problem in which the workload can be estimated a
                 priori, we present a nonlinear programming formulation
                 to optimally minimize the total execution time of a
                 loop under the constraints of SPM size and temperature.
                 To solve a problem in which the workload is not known a
                 priori, we propose a temperature-aware adaptive loop
                 scheduling algorithm called TALS to dynamically
                 allocate data to cache and SPM at runtime. The
                 experimental results show that our algorithms can
                 effectively achieve both performance and temperature
                 optimization for embedded systems with cache and SPM.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2015:MPA,
  author =       "Weihua Zhang and Jiaxin Li and Yi Li and Haibo Chen",
  title =        "Multilevel Phase Analysis",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629594",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Phase analysis, which classifies the set of execution
                 intervals with similar execution behavior and resource
                 requirements, has been widely used in a variety of
                 systems, including dynamic cache reconfiguration,
                 prefetching, race detection, and sampling simulation.
                 Although phase granularity has been a major factor in
                 the accuracy of phase analysis, it has not been well
                 investigated, and most systems usually adopt a
                 fine-grained scheme. However, such a scheme can only
                 take account of recent local phase information and
                 could be frequently interfered by temporary noise due
                 to instant phase changes, which might notably limit the
                 accuracy. In this article, we make the first
                 investigation on the potential of multilevel phase
                 analysis (MLPA), where different granularity phase
                 analyses are combined together to improve the overall
                 accuracy. The key observation is that the
                 coarse-grained intervals belonging to the same phase
                 usually consist of stably distributed fine-grained
                 phases. Moreover, the phase of a coarse-grained
                 interval can be accurately identified based on the
                 fine-grained intervals at the beginning of its
                 execution. Based on the observation, we design and
                 implement an MLPA scheme. In such a scheme, a
                 coarse-grained phase is first identified based on the
                 fine-grained intervals at the beginning of its
                 execution. The following fine-grained phases in it are
                 then predicted based on the sequence of fine-grained
                 phases in the coarse-grained phase. Experimental
                 results show that such a scheme can notably improve the
                 prediction accuracy. Using a Markov fine-grained phase
                 predictor as the baseline, MLPA can improve prediction
                 accuracy by 20\%, 39\%, and 29\% for next phase, phase
                 change, and phase length prediction for SPEC2000,
                 respectively, yet incur only about 2\% time overhead
                 and 40\% space overhead (about 360 bytes in total). To
                 demonstrate the effectiveness of MLPA, we apply it to a
                 dynamic cache reconfiguration system that dynamically
                 adjusts the cache size to reduce the power consumption
                 and access time of the data cache. Experimental results
                 show that MLPA can further reduce the average cache
                 size by 15\% compared to the fine-grained scheme.
                 Moreover, for MLPA, we also observe that coarse-grained
                 phases can better capture the overall program
                 characteristics with fewer of phases and the last
                 representative phase could be classified in a very
                 early program position, leading to fewer execution
                 internals being functionally simulated. Based on this
                 observation, we also design a multilevel sampling
                 simulation technique that combines both fine- and
                 coarse-grained phase analysis for sampling simulation.
                 Such a scheme uses fine-grained simulation points to
                 represent only the selected coarse-grained simulation
                 points instead of the entire program execution; thus,
                 it could further reduce both the functional and
                 detailed simulation time. Experimental results show
                 that MLPA for sampling simulation can achieve a speedup
                 in simulation time of about 8.3X with similar accuracy
                 compared to 10M SimPoint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Banaiyanmofrad:2015:UFF,
  author =       "Abbas Banaiyanmofrad and Houman Homayoun and Nikil
                 Dutt",
  title =        "Using a Flexible Fault-Tolerant Cache to Improve
                 Reliability for Ultra Low Voltage Operation",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629566",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches are known to consume a large part of total
                 microprocessor power. Traditionally, voltage scaling
                 has been used to reduce both dynamic and leakage power
                 in caches. However, aggressive voltage reduction causes
                 process-variation--induced failures in cache SRAM
                 arrays, which compromise cache reliability. In this
                 article, we propose FFT-Cache, a flexible
                 fault-tolerant cache that uses a flexible defect map to
                 configure its architecture to achieve significant
                 reduction in energy consumption through aggressive
                 voltage scaling while maintaining high error
                 reliability. FFT-Cache uses a portion of faulty cache
                 blocks as redundancy-using block-level or line-level
                 replication within or between sets-to tolerate other
                 faulty caches lines and blocks. Our configuration
                 algorithm categorizes the cache lines based on degree
                 of conflict between their blocks to reduce the
                 granularity of redundancy replacement. FFT-Cache
                 thereby sacrifices a minimal number of cache lines to
                 avoid impacting performance while tolerating the
                 maximum amount of defects. Our experimental results on
                 a processor executing SPEC2K benchmarks demonstrate
                 that the operational voltage of both L1/L2 caches can
                 be reduced down to 375 mV, which achieves up to 80\%
                 reduction in the dynamic power and up to 48\% reduction
                 in the leakage power. This comes with only a small
                 performance loss ({$<$}\%5) and 13\% area overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Owaida:2015:EDS,
  author =       "Muhsen Owaida and Gabriel Falcao and Joao Andrade and
                 Christos Antonopoulos and Nikolaos Bellas and Madhura
                 Purnaprajna and David Novo and Georgios Karakonstantis
                 and Andreas Burg and Paolo Ienne",
  title =        "Enhancing Design Space Exploration by Extending
                 {CPU\slash GPU} Specifications onto {FPGAs}",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656207",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The design cycle for complex special-purpose computing
                 systems is extremely costly and time-consuming. It
                 involves a multiparametric design space exploration for
                 optimization, followed by design verification.
                 Designers of special purpose VLSI implementations often
                 need to explore parameters, such as optimal bitwidth
                 and data representation, through time-consuming Monte
                 Carlo simulations. A prominent example of this
                 simulation-based exploration process is the design of
                 decoders for error correcting systems, such as the
                 Low-Density Parity-Check (LDPC) codes adopted by modern
                 communication standards, which involves thousands of
                 Monte Carlo runs for each design point. Currently,
                 high-performance computing offers a wide set of
                 acceleration options that range from multicore CPUs to
                 Graphics Processing Units (GPUs) and Field Programmable
                 Gate Arrays (FPGAs). The exploitation of diverse target
                 architectures is typically associated with developing
                 multiple code versions, often using distinct
                 programming paradigms. In this context, we evaluate the
                 concept of retargeting a single OpenCL program to
                 multiple platforms, thereby significantly reducing
                 design time. A single OpenCL-based parallel kernel is
                 used without modifications or code tuning on multicore
                 CPUs, GPUs, and FPGAs. We use SOpenCL (Silicon to
                 OpenCL), a tool that automatically converts OpenCL
                 kernels to RTL in order to introduce FPGAs as a
                 potential platform to efficiently execute simulations
                 coded in OpenCL. We use LDPC decoding simulations as a
                 case study. Experimental results were obtained by
                 testing a variety of regular and irregular LDPC codes
                 that range from short/medium (e.g., 8,000 bit) to long
                 length (e.g., 64,800 bit) DVB-S2 codes. We observe
                 that, depending on the design parameters to be
                 simulated, on the dimension and phase of the design,
                 the GPU or FPGA may suit different purposes more
                 conveniently, thus providing different acceleration
                 factors over conventional multicore CPUs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2015:TWA,
  author =       "Tianzheng Wang and Duo Liu and Yi Wang and Zili Shao",
  title =        "Towards Write-Activity-Aware Page Table Management for
                 Non-volatile Main Memories",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2697394",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Non-volatile memories such as phase change memory
                 (PCM) and memristor are being actively studied as an
                 alternative to DRAM-based main memory in embedded
                 systems because of their properties, which include low
                 power consumption and high density. Though PCM is one
                 of the most promising candidates with commercial
                 products available, its adoption has been greatly
                 compromised by limited write endurance. As main memory
                 is one of the most heavily accessed components, it is
                 critical to prolong the lifetime of PCM. In this
                 article, we present {Write- Activity-aware Page Table
                 Management} (WAPTM), a simple yet effective page table
                 management scheme for reducing unnecessary writes, by
                 redesigning system software and exploiting
                 write-activity-aware features provided by the hardware.
                 We implemented WAPTM in Google Android based on the ARM
                 architecture and evaluated it with real Android
                 applications. Experimental results show that WAPTM can
                 significantly reduce writes in page tables, proving the
                 feasibility and potential of prolonging the lifetime of
                 PCM-based main memory through reducing writes at the OS
                 level.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsai:2015:JPI,
  author =       "Chun-Jen Tsai and Han-Wen Kuo and Zigang Lin and
                 Zi-Jing Guo and Jun-Fu Wang",
  title =        "A {Java} Processor {IP} Design for Embedded {SoC}",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629649",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/java2000.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a reusable Java processor
                 IP for application processors of embedded systems. For
                 the Java microarchitecture, we propose a low-cost stack
                 memory design that supports a two-fold instruction
                 folding pipeline and a low-complexity Java exception
                 handling hardware. We also propose a mapping between
                 the Java dynamic class loading model and the SoC
                 platform-based design principle so that the Java core
                 can be encapsulated as a reusable IP. To achieve this
                 goal, a two-level method area with two on-chip circular
                 buffers is proposed as an interface between the RISC
                 core and the Java core. The proposed architecture is
                 implemented on a Xilinx Virtex-5 FPGA device.
                 Experimental results show that its performance has some
                 advantages over other Java processors and a Java VM
                 with JIT acceleration on a PowerPC platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ttofis:2015:HEA,
  author =       "Christos Ttofis and Christos Kyrkou and Theocharis
                 Theocharides",
  title =        "A Hardware-Efficient Architecture for Accurate
                 Real-Time Disparity Map Estimation",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629699",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Emerging embedded vision systems utilize disparity
                 estimation as a means to perceive depth information to
                 intelligently interact with their host environment and
                 take appropriate actions. Such systems demand high
                 processing performance and accurate depth perception
                 while requiring low energy consumption, especially when
                 dealing with mobile and embedded applications, such as
                 robotics, navigation, and security. The majority of
                 real-time dedicated hardware implementations of
                 disparity estimation systems have adopted local
                 algorithms relying on simple cost aggregation
                 strategies with fixed and rectangular correlation
                 windows. However, such algorithms generally suffer from
                 significant ambiguity along depth borders and areas
                 with low texture. To this end, this article presents
                 the hardware architecture of a disparity estimation
                 system that enables good performance in both accuracy
                 and speed. The architecture implements an adaptive
                 support weight stereo correspondence algorithm that
                 integrates image segmentation information in an attempt
                 to increase the robustness of the matching process. The
                 article also presents hardware-oriented algorithmic
                 modifications/optimization techniques that make the
                 algorithm hardware-friendly and suitable for efficient
                 dedicated hardware implementation. A comparison to the
                 literature asserts that an FPGA implementation of the
                 proposed architecture is among the fastest
                 implementations in terms of million disparity
                 estimations per second (MDE/s), and with an overall
                 accuracy of 90.21\%, it presents an effective
                 processing speed/disparity map accuracy trade-off.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Peon-quiros:2015:PLD,
  author =       "Miguel Pe{\'o}n-quir{\'o}s and Alexandros Bartzas and
                 Stylianos Mamagkakis and Francky Catthoor and Jos{\'e}
                 Manuel Mend{\'\i}as and Dimitrios Soudris",
  title =        "Placement of Linked Dynamic Data Structures over
                 Heterogeneous Memories in Embedded Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2656208",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software applications use dynamic memory (allocated
                 and deallocated in the system's heap) to handle
                 dynamism in their working conditions. Embedded systems
                 tend to include complex memory organizations but most
                 techniques for dynamic memory management do not deal
                 with the placement of data objects in physical memory
                 modules. Additionally, the performance of
                 hardware-controlled cache memories may be severely
                 hindered when used with linked data structures. We
                 therefore present a methodology to map dynamic data on
                 the multilevel memory subsystem of embedded systems,
                 taking advantage of any available memories (e.g.,
                 on-chip SRAMs) and avoiding interference with the cache
                 memories. The resulting data placement uses an
                 exclusive memory model and is compatible with existing
                 techniques for managing static data. Our methodology
                 helps the designer achieve reductions in energy
                 consumption and execution time that can be obtained by
                 an expert in an automated way while keeping control
                 over the process through multiple configuration
                 knobs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Segarra:2015:ASP,
  author =       "Juan Segarra and Clemente Rodr{\'\i}guez and Rub{\'e}n
                 Gran and Luis C. Aparicio and V{\'\i}ctor Vi{\~n}als",
  title =        "{ACDC}: Small, Predictable and High-Performance Data
                 Cache",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2677093",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In multitasking real-time systems, the worst-case
                 execution time (WCET) of each task and also the effects
                 of interferences between tasks in the worst-case
                 scenario need to be calculated. This is especially
                 complex in the presence of data caches. In this
                 article, we propose a small instruction-driven data
                 cache (256 bytes) that effectively exploits locality.
                 It works by preselecting a subset of memory
                 instructions that will have data cache replacement
                 permission. Selection of such instructions is based on
                 data reuse theory. Since each selected memory
                 instruction replaces its own data cache line, it
                 prevents pollution and performance in tasks becomes
                 independent of the size of the associated data
                 structures. We have modeled several memory
                 configurations using the Lock-MS WCET analysis method.
                 Our results show that, on average, our data cache
                 effectively services 88\% of program data of the tested
                 benchmarks. Such results double the worst-case
                 performance of our tested multitasking experiments. In
                 addition, in the worst case, they reach between 75\%
                 and 89\% of the ideal case of always hitting in
                 instruction and data caches. As well, we show that
                 using partitioning on our proposed hardware only
                 provides marginal benefits in worst-case performance,
                 so using partitioning is discouraged. Finally, we study
                 the viability of our proposal in the MiBench
                 application suite by characterizing its data reuse,
                 achieving hit ratios beyond 90\% in most programs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bellasi:2015:ERR,
  author =       "Patrick Bellasi and Giuseppe Massari and William
                 Fornaciari",
  title =        "Effective Runtime Resource Management Using {Linux}
                 Control Groups with the {BarbequeRTRM} Framework",
  journal =      j-TECS,
  volume =       "14",
  number =       "2",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2658990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 26 05:58:56 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/linux.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/unix.bib",
  abstract =     "The extremely high technology process reached by
                 silicon manufacturing (smaller than 32nm) has led to
                 production of computational platforms and SoC,
                 featuring a considerable amount of resources. Whereas
                 from one side such multi- and many-core platforms show
                 growing performance capabilities, from the other side
                 they are more and more affected by power, thermal, and
                 reliability issues. Moreover, the increased
                 computational capabilities allows congested usage
                 scenarios with workloads subject to mixed and
                 time-varying requirements. Effective usage of the
                 resources should take into account both the application
                 requirements and resources availability, with an
                 arbiter, namely a resource manager in charge to solve
                 the resource contention among demanding applications.
                 Current operating systems (OS) have only a limited
                 knowledge about application-specific behaviors and
                 their time-varying requirements. Dedicated system
                 interfaces to collect such inputs and forward them to
                 the OS (e.g., its scheduler) are thus an interesting
                 research area that aims at integrating the OS with an
                 ad hoc resource manager. Such a component can exploit
                 efficient low-level OS interfaces and mechanisms to
                 extend its capabilities of controlling tasks and system
                 resources. Because of the specific tasks and timings of
                 a resource manager, this component can be easily and
                 effectively developed as a user-space extension lying
                 in between the OS and the controlled application. This
                 article, which focuses on multicore Linux systems,
                 shows a portable solution to enforce runtime resource
                 management decisions based on the standard control
                 groups framework. A burst and a mixed workload
                 analysis, performed on a multicore-based NUMA platform,
                 have reported some promising results both in terms of
                 performance and power saving.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schaumont:2015:IEP,
  author =       "Patrick Schaumont and Maire O'Neill and Tim
                 G{\"u}neysu",
  title =        "Introduction for Embedded Platforms for Cryptography
                 in the Coming Decade",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "40:1--40:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2745710",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2015:ESD,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Schizoid Design for Critical Embedded
                 Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "40e:1--40e:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2761728",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Howe:2015:PLB,
  author =       "James Howe and Thomas P{\"o}ppelmann and M{\'a}ire
                 O'Neill and Elizabeth O'Sullivan and Tim G{\"u}neysu",
  title =        "Practical Lattice-Based Digital Signature Schemes",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "41:1--41:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2724713",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Digital signatures are an important primitive for
                 building secure systems and are used in most real-world
                 security protocols. However, almost all popular
                 signature schemes are either based on the factoring
                 assumption (RSA) or the hardness of the discrete
                 logarithm problem (DSA/ECDSA). In the case of classical
                 cryptanalytic advances or progress on the development
                 of quantum computers, the hardness of these closely
                 related problems might be seriously weakened. A
                 potential alternative approach is the construction of
                 signature schemes based on the hardness of certain
                 lattice problems that are assumed to be intractable by
                 quantum computers. Due to significant research
                 advancements in recent years, lattice-based schemes
                 have now become practical and appear to be a very
                 viable alternative to number-theoretic cryptography. In
                 this article, we focus on recent developments and the
                 current state of the art in lattice-based digital
                 signatures and provide a comprehensive survey
                 discussing signature schemes with respect to
                 practicality. Additionally, we discuss future research
                 areas that are essential for the continued development
                 of lattice-based cryptography.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boorghany:2015:CIL,
  author =       "Ahmad Boorghany and Siavash Bayat Sarmadi and Rasool
                 Jalili",
  title =        "On Constrained Implementation of Lattice-Based
                 Cryptographic Primitives and Schemes on Smart Cards",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "42:1--42:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700078",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most lattice-based cryptographic schemes with a
                 security proof suffer from large key sizes and heavy
                 computations. This is also true for the simpler case of
                 authentication protocols that are used on smart cards
                 as a very-constrained computing environment. Recent
                 progress on ideal lattices has significantly improved
                 the efficiency and made it possible to implement
                 practical lattice-based cryptography on constrained
                 devices. However, to the best of our knowledge, no
                 previous attempts have been made to implement
                 lattice-based schemes on smart cards. In this article,
                 we provide the results of our implementation of several
                 state-of-the-art lattice-based authentication protocols
                 on smart cards and a microcontroller widely used in
                 smart cards. Our results show that only a few of the
                 proposed lattice-based authentication protocols can be
                 implemented using limited resources of such constrained
                 devices; however, cutting-edge ones are suitably
                 efficient to be used practically on smart cards.
                 Moreover, we have implemented fast Fourier transform
                 (FFT) and discrete Gaussian sampling with different
                 typical parameter sets, as well as versatile
                 lattice-based public-key encryptions. These results
                 have noticeable points that help to design or optimize
                 lattice-based schemes for constrained devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aysu:2015:FRT,
  author =       "Aydin Aysu and Bilgiday Yuce and Patrick Schaumont",
  title =        "The Future of Real-Time Security: Latency-Optimized
                 Lattice-Based Digital Signatures",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "43:1--43:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2724714",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in quantum computing have spurred a
                 significant amount of research into public-key
                 cryptographic algorithms that are resistant against
                 postquantum cryptanalysis. Lattice-based cryptography
                 is one of the important candidates because of its
                 reasonable complexity combined with reasonable
                 signature sizes. However, in a postquantum world, not
                 only the cryptography will change but also the
                 computing platforms. Large amounts of
                 resource-constrained embedded systems will connect to a
                 cloud of powerful server computers. We present an
                 optimization technique for lattice-based signature
                 generation on such embedded systems; our goal is to
                 optimize latency rather than throughput. Indeed, on an
                 embedded system, the latency of a single signature for
                 user identification or message authentication is more
                 important than the aggregate signature generation rate.
                 We build a high-performance implementation using
                 hardware\slash software codesign techniques. The key
                 idea is to partition the signature generation scheme
                 into offline and online phases. The signature scheme
                 allows this separation because a large portion of the
                 computation does not depend on the message to be signed
                 and can be handled before the message is given. Then,
                 we can map complex precomputation operations in
                 software on a low-cost processor and utilize hardware
                 resources to accelerate simpler online operations. To
                 find the optimum hardware architecture for the target
                 platform, we define and explore the design space and
                 implement two design configurations. We realize our
                 solutions on the Altera Cyclone-IV CGX150 FPGA. The
                 implementation consists of a NIOS soft-core processor
                 and a low-latency hash and polynomial multiplication
                 engine. On average, the proposed low-latency
                 architecture can generate a signature with a latency of
                 96 clock cycles at 40MHz, resulting in a response time
                 of 2.4 $ \mu $ s for a signing request. On equivalent
                 platforms, this corresponds to a performance
                 improvement of 33 and 105 times compared to previous
                 hardware and software implementations, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{VonMaurich:2015:IQM,
  author =       "Ingo {Von Maurich} and Tobias Oder and Tim
                 G{\"u}neysu",
  title =        "Implementing {QC--MDPC} {McEliece} Encryption",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "44:1--44:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700102",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With respect to performance, asymmetric code-based
                 cryptography based on binary Goppa codes has been
                 reported as a highly interesting alternative to RSA and
                 ECC. A major drawback is still the large keys in the
                 range between 50 and 100KB that prevented real-world
                 applications of code-based cryptosystems so far. A
                 recent proposal by Misoczki et al. showed that
                 quasi-cyclic moderate-density parity-check (QC-MDPC)
                 codes can be used in McEliece encryption, reducing the
                 public key to just 0.6KB to achieve an 80-bit security
                 level. In this article, we provide optimized decoding
                 techniques for MDPC codes and survey several efficient
                 implementations of the QC-MDPC McEliece cryptosystem.
                 This includes high-speed and lightweight architectures
                 for reconfigurable hardware, efficient coding styles
                 for ARM's Cortex-M4 microcontroller, and novel
                 high-performance software implementations that fully
                 employ vector instructions. Finally, we conclude that
                 McEliece encryption in combination with QC-MDPC codes
                 not only enables high-performance implementations but
                 also allows for lightweight designs on a wide range of
                 different platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Massolino:2015:OSC,
  author =       "Pedro Maat C. Massolino and Paulo S. L. M. Barreto and
                 Wilson V. Ruggiero",
  title =        "Optimized and Scalable Co-Processor for {McEliece}
                 with Binary {Goppa} Codes",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "45:1--45:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2736284",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Asymmetric cryptographic primitives are essential to
                 enable secure communications in public networks or
                 public mediums. Such primitives can be deployed as
                 software libraries or hardware co-processors, the
                 latter being more commonly employed in systems on chip
                 (SoC) scenarios, embedded devices, or
                 application-specific servers. Unfortunately, the most
                 commonly available solutions, based on RSA or elliptic
                 curve cryptography (ECC), are highly processing
                 intensive due to the underlying extended-precision
                 modular arithmetic. Consequently, they are not
                 available on highly constrained platforms. Aiming to
                 tackle this issue, we here investigate an alternative
                 asymmetric encryption scheme that relies on lightweight
                 arithmetic: McEliece. This scheme is especially
                 appealing because, being based on error correction
                 codes, it displays a simpler arithmetic and leads to
                 better performance when compared to RSA or ECC. To
                 evaluate the implementation of this scheme in hardware,
                 we propose and analyze a flexible architecture whose
                 security level and time versus area usage
                 characteristics can be reconfigured as desired. The
                 proposed architecture is suitable to all usual security
                 levels, ranging from 80 to 256 bits. It is also very
                 efficient, being able to perform data decryption with
                 binary Goppa codes in 56$ \mu $ s with 3,402 slices on
                 a Xilinx Spartan-3AN FPGA, whereas the best-known
                 result in the literature for the same FPGA is 115$ \mu
                 $ s with 7,331 slices. Alternatively, the architecture
                 can operate with quasi-dyadic Goppa (QD-Goppa) codes,
                 which involves smaller keys than traditional binary
                 Goppa codes. In the latter case, for an 80-bit security
                 level, the decryption operation can take from 1.1ms
                 with 1,129 slices to 68$ \mu $ s with 8,268 slices. By
                 choosing a more hardware-friendly decoding algorithm,
                 focusing hardware resources on most bottleneck
                 operations and sharing hardware resource for two
                 different algorithms, better results than the those in
                 the literature were obtained.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Millo:2015:MAD,
  author =       "Jean-Vivien Millo and Emilien Kofman and Robert {De
                 Simone}",
  title =        "Modeling and Analyzing Dataflow Applications on
                 {NoC}-Based Many-Core Architectures",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "46:1--46:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700081",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The advent of chip-level parallel architectures
                 prompted a renewal of interest into dataflow process
                 networks. The trend is to model an application
                 independently from the architecture, then the model is
                 morphed to best fit the target architecture. One
                 downplayed aspect is the mapping of communications
                 through the on-chip topology. The cost of such
                 communications is often prevalent with regard to
                 computations. This article establishes a dataflow
                 process network called K-periodically Routed Graph
                 (KRG), which serves the role of representing the
                 various routing decisions during the transformation of
                 a genuine application into a architecture-aware version
                 for this application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Davis:2015:GPM,
  author =       "Robert I. Davis and Alan Burns and Jose Marinho and
                 Vincent Nelis and Stefan M. Petters and Marko
                 Bertogna",
  title =        "Global and Partitioned Multiprocessor Fixed Priority
                 Scheduling with Deferred Preemption",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "47:1--47:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2739954",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces schedulability analysis for
                 Global Fixed Priority Scheduling with Deferred
                 Preemption (gFPDS) for homogeneous multiprocessor
                 systems. gFPDS is a superset of Global Fixed Priority
                 Preemptive Scheduling (gFPPS) and Global Fixed Priority
                 Nonpreemptive Scheduling (gFPNS). We show how
                 schedulability can be improved using gFPDS via
                 appropriate choice of priority assignment and final
                 nonpreemptive region lengths, and provide algorithms
                 that optimize schedulability in this way. Via an
                 experimental evaluation we compare the performance of
                 multiprocessor scheduling using global approaches:
                 gFPDS, gFPPS, and gFPNS, and also partitioned
                 approaches employing FPDS, FPPS, and FPNS on each
                 processor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tilli:2015:GCR,
  author =       "Andrea Tilli and Andrea Bartolini and Matteo Cacciari
                 and Luca Benini",
  title =        "Guaranteed Computational Resprinting via
                 Model-Predictive Control",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "48:1--48:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2724715",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today and future many-core systems are facing the
                 utilization wall and dark silicon problems, for which
                 not all the processing engines can be powered at the
                 same time as this will lead to a power consumption
                 higher than the Total Design Power (TDP) budget.
                 Recently, computational sprinting approaches addressed
                 the problem by exploiting the intrinsic thermal
                 capacitance of the chip and the properties of common
                 applications, which require intense, but temporary, use
                 of resources. The thermal capacitance, possibly
                 augmented with phase change materials, enables the
                 temporary activation of all the resources
                 simultaneously, although they largely exceed the
                 steady-state thermal design power. In this article, we
                 present an innovative and low-overhead hierarchical
                 model-predictive controller for managing thermally safe
                 sprinting with predictable resprinting rate, which
                 ensures the correct execution of mixed-criticality
                 tasks. Well-targeted simulations, also based on real
                 workload benchmarks, show the applicability and the
                 effectiveness of our solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sayyah:2015:VPB,
  author =       "Parinaz Sayyah and Mihai T. Lazarescu and Sara Bocchio
                 and Emad Ebeid and Gianluca Palermo and Davide Quaglia
                 and Alberto Rosti and Luciano Lavagno",
  title =        "Virtual Platform-Based Design Space Exploration of
                 Power-Efficient Distributed Embedded Applications",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "49:1--49:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2723161",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Networked embedded systems are essential building
                 blocks of a broad variety of distributed applications
                 ranging from agriculture to industrial automation to
                 healthcare and more. These often require specific
                 energy optimizations to increase the battery lifetime
                 or to operate using energy harvested from the
                 environment. Since a dominant portion of power
                 consumption is determined and managed by software, the
                 software development process must have access to the
                 sophisticated power management mechanisms provided by
                 state-of-the-art hardware platforms to achieve the best
                 tradeoff between system availability and reactivity.
                 Furthermore, internode communications must be
                 considered to properly assess the energy consumption.
                 This article describes a design flow based on a SystemC
                 virtual platform including both accurate power models
                 of the hardware components and a fast abstract model of
                 the wireless network. The platform allows both
                 model-driven design of the application and the
                 exploration of power and network management
                 alternatives. These can be evaluated in different
                 network scenarios, allowing one to exploit power
                 optimization strategies without requiring expensive
                 field trials. The effectiveness of the approach is
                 demonstrated via experiments on a wireless body area
                 network application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tamas-Selicean:2015:DOM,
  author =       "Domitian Tamas-Selicean and Paul Pop",
  title =        "Design Optimization of Mixed-Criticality Real-Time
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "50:1--50:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700103",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we are interested in implementing
                 mixed-criticality real-time embedded applications on a
                 given heterogeneous distributed architecture.
                 Applications have different criticality levels,
                 captured by their Safety-Integrity Level (SIL), and are
                 scheduled using static-cyclic scheduling. According to
                 certification standards, mixed-criticality tasks can be
                 integrated onto the same architecture only if there is
                 enough spatial and temporal separation among them. We
                 consider that the separation is provided by
                 partitioning, such that applications run in separate
                 partitions, and each partition is allocated several
                 time slots on a processor. Tasks of different SILs can
                 share a partition only if they are all elevated to the
                 highest SIL among them. Such elevation leads to
                 increased development costs, which increase
                 dramatically with each SIL. Tasks of higher SILs can be
                 decomposed into redundant structures of lower SIL
                 tasks. We are interested to determine (i) the mapping
                 of tasks to processors, (ii) the assignment of tasks to
                 partitions, (iii) the decomposition of tasks into
                 redundant lower SIL tasks, (iv) the sequence and size
                 of the partition time slots on each processor, and (v)
                 the schedule tables, such that all the applications are
                 schedulable and the development costs are minimized. We
                 have proposed a Tabu Search-based approach to solve
                 this optimization problem. The proposed algorithm has
                 been evaluated using several synthetic and real-life
                 benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Papagiannopoulou:2015:EEH,
  author =       "Dimitra Papagiannopoulou and Giuseppe Capodanno and
                 Tali Moreshet and Maurice Herlihy and R. Iris Bahar",
  title =        "Energy-Efficient and High-Performance Lock Speculation
                 Hardware for Embedded Multicore Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "51:1--51:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700097",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems are becoming increasingly common in
                 everyday life and like their general-purpose
                 counterparts, they have shifted towards shared memory
                 multicore architectures. However, they are much more
                 resource constrained, and as they often run on
                 batteries, energy efficiency becomes critically
                 important. In such systems, achieving high concurrency
                 is a key demand for delivering satisfactory performance
                 at low energy cost. In order to achieve this high
                 concurrency, consistency across the shared memory
                 hierarchy must be accomplished in a cost-effective
                 manner in terms of performance, energy, and
                 implementation complexity. In this article, we propose
                 Embedded-Spec, a hardware solution for supporting
                 transparent lock speculation, without the requirement
                 for special supporting instructions. Using this
                 approach, we evaluate the energy consumption and
                 performance of a suite of benchmarks, exploring a range
                 of contention management and retry policies. We
                 conclude that for resource-constrained platforms, lock
                 speculation can provide real benefits in terms of
                 improved concurrency and energy efficiency, as long as
                 the underlying hardware support is carefully
                 configured.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Santinelli:2015:PCP,
  author =       "Luca Santinelli and Liliana Cucu-Grosjean",
  title =        "A Probabilistic Calculus for Probabilistic Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "52:1--52:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2717113",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Challenges within real-time research are mostly in
                 terms of modeling and analyzing the complexity of
                 actual real-time embedded systems. Probabilities are
                 effective in both modeling and analyzing embedded
                 systems by increasing the amount of information for the
                 description of elements composing the system. Elements
                 are tasks and applications that need resources,
                 schedulers that execute tasks, and resource
                 provisioning that satisfies the resource demand. In
                 this work, we present a model that considers
                 component-based real-time systems with component
                 interfaces able to abstract both the functional and
                 nonfunctional requirements of components and the
                 system. Our model faces probabilities and probabilistic
                 real-time systems unifying in the same framework
                 probabilistic scheduling techniques and compositional
                 guarantees varying from soft to hard real time. We
                 provide an algebra to work with the probabilistic
                 notation developed and form an analysis in terms of
                 sufficient probabilistic schedulability conditions for
                 task systems with either preemptive fixed-priority or
                 earliest deadline first scheduling paradigms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anand:2015:ICL,
  author =       "Kapil Anand and Rajeev Barua",
  title =        "Instruction-Cache Locking for Improving Embedded
                 Systems Performance",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "53:1--53:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700100",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cache memories in embedded systems play an important
                 role in reducing the execution time of applications.
                 Various kinds of extensions have been added to cache
                 hardware to enable software involvement in replacement
                 decisions, improving the runtime over a purely
                 hardware-managed cache. Novel embedded systems, such as
                 Intel's XScale and ARM Cortex processors, facilitate
                 locking one or more lines in cache; this feature is
                 called cache locking. We present a method in for
                 instruction-cache locking that is able to reduce the
                 average-case runtime of a program. We demonstrate that
                 the optimal solution for instruction cache locking can
                 be obtained in polynomial time. However, a fundamental
                 lack of correlation between cache hardware and software
                 program points renders such optimal solutions
                 impractical. Instead, we propose two practical
                 heuristics-based approaches to achieve cache locking.
                 First, we present a static mechanism for locking the
                 cache, in which the locked contents of the cache are
                 kept fixed over the execution of the program. Next, we
                 present a dynamic mechanism that accounts for changing
                 program requirements at runtime. We devise a
                 cost--benefit model to discover the memory addresses
                 that should be locked in the cache. We implement our
                 scheme inside a binary rewriter, widening the
                 applicability of our scheme to binaries compiled using
                 any compiler. Results obtained on a suite of MiBench
                 benchmarks show that our static mechanism results in
                 20\% improvement in the instruction-cache miss rate on
                 average and up to 18\% improvement in the execution
                 time on average for applications having instruction
                 accesses as a bottleneck, compared to no cache locking.
                 The dynamic mechanism improves the cache miss rate by
                 35\% on average and execution time by 32\% on
                 instruction-cache-constrained applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cooke:2015:FSM,
  author =       "Patrick Cooke and Lu Hao and Greg Stitt",
  title =        "Finite-State-Machine Overlay Architectures for Fast
                 {FPGA} Compilation and Application Portability",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "54:1--54:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700082",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite significant advantages, wider usage of
                 field-programmable gate arrays (FPGAs) has been limited
                 by lengthy compilation and a lack of portability.
                 Virtual-architecture overlays have partially addressed
                 these problems, but previous work focuses mainly on
                 heavily pipelined applications with minimal control
                 requirements. We expand previous work by enabling more
                 flexible control via overlay architectures for
                 finite-state machines. Although not appropriate for
                 control-intensive circuits, the presented architectures
                 reduced compilation times of control changes in a
                 convolution case study from 7 hours to less than 1
                 second, with no performance overhead and an area
                 overhead of 0.2\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Watkins:2015:UNT,
  author =       "Lanier Watkins and William H. Robinson and Raheem
                 Beyah",
  title =        "Using Network Traffic to Infer Hardware State: a
                 Kernel-Level Investigation",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "55:1--55:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700094",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we illustrate that the boundary of a
                 general-purpose node can be extended into the network
                 by extracting information from network traffic
                 generated by that general-purpose node to infer the
                 state of its hardware components. This information is
                 represented in a delay signature latent within the
                 network traffic. In contrast, the traditional approach
                 to determine the internal state of a node's resources
                 meant that a software application with internal
                 processes had to be resident on the node. The
                 aforementioned delay signature is the keystone that
                 provides a correlation between network traffic and the
                 internal state of the source node. We characterize this
                 delay signature by (1) identifying the different types
                 of assembly language instructions that source this
                 delay and (2) describing how architectural techniques,
                 such as instruction pipelining and caching, give rise
                 to this delay signature. In theory, highly utilized
                 nodes (due to multiple threads) will contain excessive
                 context switching and contention for shared resources.
                 One important shared resource is main memory, and
                 excessive use of this resource by applications and
                 internal processes eventually leads to a decrease in
                 cache efficiency that eventually stalls the instruction
                 pipeline. Our results support this theory;
                 specifically, we have observed that excessive context
                 switching in active applications increases the
                 effective memory access time and wastes precious CPU
                 cycles, thus adding additional delay to the execution
                 of load, store, and other instructions. Because the
                 operating system (OS) kernel accesses memory to send
                 network packets, the delay signature is induced into
                 network traffic in situations where user-level
                 utilization is high. We demonstrate this theory in two
                 case studies: (1) resource discovery in cluster grids
                 and (2) network-based detection of bitcoin mining on
                 compromised nodes.",
  acknowledgement = ack-nhfb,
  acmid =        "2700094",
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  keywords =     "LEON4 processor, clusters assembly language
                 instructions, grid computing, passive resource
                 discovery",
  pagecount =    "22",
}

@Article{Kerrison:2015:EMS,
  author =       "Steve Kerrison and Kerstin Eder",
  title =        "Energy Modeling of Software for a Hardware
                 Multithreaded Embedded Microprocessor",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "56:1--56:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700104",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article examines a hardware multithreaded
                 microprocessor and discusses the impact such an
                 architecture has on existing software energy modeling
                 techniques. A framework is constructed for analyzing
                 the energy behavior of the XMOS XS1-L multithreaded
                 processor and a variation on existing software energy
                 models is proposed, based on analysis of collected
                 energy data. It is shown that by combining execution
                 statistics with sufficient data on the processor's
                 thread activity and instruction execution costs, a
                 multithreaded software energy model used with
                 Instruction Set Simulation can yield an average error
                 margin of less than 7\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cilardo:2015:ECA,
  author =       "Alessandro Cilardo and Edoardo Fusella and Luca Gallo
                 and Antonino Mazzeo",
  title =        "Exploiting Concurrency for the Automated Synthesis of
                 {MPSoC} Interconnects",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "57:1--57:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700075",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multiprocessor Systems-on-Chip (MPSoC) applications
                 can rely today on a very large spectrum of
                 interconnection topologies potentially meeting given
                 communication requirements, determining various
                 trade-offs between cost and performance. Building
                 interconnects that enable concurrent communication
                 tasks introduces decisive opportunities for reducing
                 the overall communication latency. This work identifies
                 three levels of parallelism at the interconnect level:
                 global parallelism across different independent
                 domains; local or intradomain parallelism, relying on
                 inherently concurrent interconnect components such as
                 crossbars; and interdomain parallelism, where multiple
                 concurrent paths across different local domains are
                 exploited. We propose an automated methodology to
                 search the design space, aimed at maximizing the
                 exploitation of these forms of parallelism. The
                 approach also takes into consideration possible
                 dependencies between communication tasks, which further
                 constrains the design space, making the identification
                 of a feasible solution more challenging. By jointly
                 solving a scheduling and interconnect synthesis
                 problem, the methodology turns the description of the
                 application communication requirements, including data
                 dependencies, into an on-chip synthesizable
                 interconnection structure along with a communication
                 schedule satisfying given area constraints. The article
                 thoroughly describes the formalisms and the methodology
                 used to derive such optimized heterogeneous topologies.
                 It also discusses some case studies emphasizing the
                 impact of the proposed approach and highlighting the
                 essential differences with a few other solutions
                 presented in the technical literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Geeraerts:2015:VCA,
  author =       "Gilles Geeraerts and Alexander Heu{\ss}ner and
                 Jean-Fran{\c{c}}ois Raskin",
  title =        "On the Verification of Concurrent, Asynchronous
                 Programs with Waiting Queues",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "58:1--58:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700072",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, new libraries, such as Grand Central
                 Dispatch (GCD), have been proposed to directly harness
                 the power of multicore platforms and to make the
                 development of concurrent software more accessible to
                 software engineers. When using such a library, the
                 programmer writes so-called blocks, which are chunks of
                 code, and dispatches them using synchronous or
                 asynchronous calls to several types of waiting queues.
                 A scheduler is then responsible for dispatching those
                 blocks among the available cores. Blocks can
                 synchronize via a global memory. In this article, we
                 propose Queue-Dispatch Asynchronous Systems as a
                 mathematical model that faithfully formalizes the
                 synchronization mechanisms and behavior of the
                 scheduler in those systems. We study in detail their
                 relationships to classical formalisms such as pushdown
                 systems, Petri nets, Fifo systems, and counter systems.
                 Our main technical contributions are precise worst-case
                 complexity results for the Parikh coverability problem
                 and the termination problem for several subclasses of
                 our model. We also consider an extension of Qdas with a
                 fork-join mechanism. Adding fork-join to any of the
                 subclasses that we have identified leads to
                 undecidability of the coverability problem. This
                 motivates the study of over-approximations. Finally, we
                 consider handmade abstractions as a practical way of
                 verifying programs that cannot be faithfully modeled by
                 decidable subclasses of Qdas.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2015:COM,
  author =       "Kai Huang and Min Yu and Rongjie Yan and Xiaomeng
                 Zhang and Xiaolang Yan and Lisane Brisolara and Ahmed
                 Amine Jerraya and Jiong Feng",
  title =        "Communication Optimizations for Multithreaded Code
                 Generation from {Simulink} Models",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "59:1--59:??",
  month =        may,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2644811",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:08:56 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Communication frequency is increasing with the growing
                 complexity of emerging embedded applications and the
                 number of processors in the implemented multiprocessor
                 SoC architectures. In this article, we consider the
                 issue of communication cost reduction during
                 multithreaded code generation from partitioned Simulink
                 models to help designers in code optimization to
                 improve system performance. We first propose a
                 technique combining message aggregation and
                 communication pipeline methods, which groups
                 communications with the same destinations and sources
                 and parallelizes communication and computation tasks.
                 We also present a method to apply static analysis and
                 dynamic emulation for efficient communication buffer
                 allocation to further reduce synchronization cost and
                 increase processor utilization. The existing cyclic
                 dependency in the mapped model may hinder the
                 effectiveness of the two techniques. We further propose
                 a set of optimizations involving repartition with
                 strongly connected threads to maximize the degree of
                 communication reduction and preprocessing strategies
                 with available delays in the model to reduce the number
                 of communication channels that cannot be optimized.
                 Experimental results demonstrate the advantages of the
                 proposed optimizations with 11--143\% throughput
                 improvement.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mathew:2015:NMB,
  author =       "Jimson Mathew and Rajat Subhra Chakraborty and Durga
                 Prasad Sahoo and Yuanfan Yang and Dhiraj K. Pradhan",
  title =        "A Novel Memristor-Based Hardware Security Primitive",
  journal =      j-TECS,
  volume =       "14",
  number =       "3",
  pages =        "60:1--60:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2736285",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 17:21:32 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Memristor is an exciting new addition to the
                 repertoire of fundamental circuit elements.
                 Alternatives to many security protocols originally
                 employing traditional mathematical cryptography involve
                 novel hardware security primitives, such as Physically
                 Unclonable Functions (PUFs). In this article, we
                 propose a novel hybrid memristor-CMOS PUF circuit and
                 demonstrate its suitability through extensive
                 simulations of environmental and process variation
                 effects. The proposed PUF circuit has substantially
                 less hardware overhead than previously proposed
                 memristor-based PUF circuits while being inherently
                 resistant to machine learning-based modeling attacks
                 because of challenge-dependent delays of the memristor
                 stages. The proposed PUF can be conveniently used in
                 many security applications and protocols based on
                 hardware-intrinsic security.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2015:EBD,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Big Data, {Internet of Things},
                 Cybersecurity --- A New Trinity of Embedded Systems
                 Research",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "61:1--61:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2820608",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Barkaoui:2015:GES,
  author =       "Kamel Barkaoui and Luca Bernardinello and Andrey
                 Mokhov",
  title =        "Guest Editorial for Special Issue Application of
                 Concurrency to System Design",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "62:1--62:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2809925",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Furbach:2015:MMA,
  author =       "Florian Furbach and Roland Meyer and Klaus Schneider
                 and Maximilian Senftleben",
  title =        "Memory-Model-Aware Testing: a Unified Complexity
                 Analysis",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "63:1--63:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2753761",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "To improve the performance of the memory system,
                 multiprocessors implement weak memory consistency
                 models. Weak memory models admit different views of the
                 processes on their load and store instructions, thus
                 allowing for computations that are not sequentially
                 consistent. Program analyses have to take into account
                 the memory model of the targeted hardware. This is
                 challenging because numerous memory models have been
                 developed, and every memory model requires its own
                 analysis. In this article, we study a prominent
                 approach to program analysis: testing. The testing
                 problem takes as input sequences of operations, one for
                 each process in the concurrent program. The task is to
                 check whether these sequences can be interleaved to an
                 execution of the entire program that respects the
                 constraints of a memory model under consideration. We
                 determine the complexity of the testing problem for
                 most of the known memory models. Moreover, we study the
                 impact on the complexity of parameters, such as the
                 number of concurrent processes, the length of their
                 executions, and the number of shared variables. What
                 differentiates our contribution from related results is
                 a uniform approach that avoids considering each memory
                 model on its own. We build upon work of Steinke and
                 Nutt. They showed that the existing memory models form
                 a hierarchy where one model is called weaker than
                 another one if it includes the latter's behavior. Using
                 the Steinke-Nutt hierarchy, we develop three general
                 concepts that allow us to quickly determine the
                 complexity of a testing problem. First, we generalize
                 the technique of problem reductions from complexity
                 theory. So-called range reductions propagate hardness
                 results between memory models, and we apply them to
                 establish NP lower bounds for the stronger memory
                 models. Second, for the weaker models, we present
                 polynomial-time testing algorithms that are inspired by
                 determinization algorithms for automata. Finally, we
                 describe a single SAT encoding of the testing problem
                 that works for all memory models in the Steinke-Nutt
                 hierarchy to prove their membership in NP. Our results
                 are general enough to carry over to future weak memory
                 models. Moreover, they show that SAT solvers are
                 adequate tools for testing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Knapik:2015:ASB,
  author =       "Michal Knapik and Artur Meski and Wojciech Penczek",
  title =        "Action Synthesis for Branching Time Logic: Theory and
                 Applications",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "64:1--64:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2746337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The article introduces a parametric extension of
                 Action-Restricted Computation Tree Logic called
                 pmARCTL. A symbolic fixed-point algorithm providing a
                 solution to the exhaustive parameter synthesis problem
                 is proposed. The parametric approach allows for an
                 in-depth system analysis and synthesis of the correct
                 parameter values. The time complexity of the problem
                 and the algorithm is provided. An existential fragment
                 of pmARCTL (pmEARCTL) is identified, in which all of
                 the solutions can be generated from a minimal and
                 unique base. A method for computing this base using
                 symbolic methods is provided. The prototype tool
                 SPATULA implementing the algorithm is applied to the
                 analysis of three benchmarks: faulty
                 Train-Gate-Controller, Peterson's mutual exclusion
                 protocol, and a generic pipeline processing network.
                 The experimental results show efficiency and
                 scalability of our approach compared to the naive
                 solution to the problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Siirtola:2015:PMI,
  author =       "Antti Siirtola and Keijo Heljanko",
  title =        "Parametrised Modal Interface Automata",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "65:1--65:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2776892",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Interface theories (ITs) enable us to analyse the
                 compatibility interfaces and refine them while
                 preserving their compatibility. However, most ITs are
                 for finite state interfaces, whereas computing systems
                 are often parametrised involving components, the number
                 of which cannot be fixed. We present, to our knowledge,
                 the first IT that allows us to specify a parametric
                 number of interfaces. Moreover, we provide a fully
                 algorithmic procedure, implemented in a tool, for
                 checking the compatibility of and refinement between
                 parametrised interfaces. Finally, we show that the
                 restrictions of the technique are necessary; removing
                 any of them renders the refinement checking problem
                 undecidable.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cotard:2015:SHR,
  author =       "Sylvain Cotard and Audrey Queudet and Jean-Luc
                 B{\'e}chennec and S{\'e}bastien Faucou and Yvon
                 Trinquet",
  title =        "{STM--HRT}: a Robust and Wait-Free {STM} for Hard
                 Real-Time Multicore Embedded Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "66:1--66:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2786979",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces STM-HRT, a nonblocking
                 wait-free software transactional memory (STM) for hard
                 real-time (HRT) multicore embedded systems. Resource
                 access control in HRT systems is usually implemented
                 with lock-based synchronization. However, these
                 mechanisms may lead to deadlocks or starvations and do
                 not scale well with the number of cores. Most existing
                 nonblocking STM are not suitable for HRT systems,
                 because it is not possible to find an upper bound of
                 the execution time for each task. In this article, we
                 show how STM-HRT can be a robust solution for resource
                 sharing in HRT multicore systems. We provide a detailed
                 description of STM-HRT architecture. We propose a set
                 of arguments to establish the functional correctness of
                 its concurrency control protocol. Finally, as part of a
                 real-time analysis, we derive upper bounds on the
                 computations required to access shared data under
                 STM-HRT.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bujtor:2015:FSM,
  author =       "Ferenc Bujtor and Walter Vogler",
  title =        "Failure Semantics for Modal Transition Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "67:1--67:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2746336",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the aim to preserve deadlock freedom, we define a
                 new refinement preorder for modal transition systems
                 (MTSs), using an MTS-specific variant of testing
                 inspired by De Nicola and Hennessy. We characterize
                 this refinement with a kind of failure semantics and
                 show that it ``supports itself,'' for example, in the
                 sense of thoroughness-in contrast to standard modal
                 refinements. We present a conjunction operator with
                 respect to our new refinement, which is quite different
                 from existing ones. It always returns an MTS-again in
                 contrast to the case of modal refinement. Finally, we
                 also consider De Nicola's and Hennessy's may- and
                 must-testing, where the latter leads to a semantics
                 that is also compositional for hiding.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DeGroote:2015:IAC,
  author =       "Robert {De Groote} and Philip K. F. H{\"o}lzenspies
                 and Jan Kuper and Gerard J. M. Smit",
  title =        "Incremental Analysis of Cyclo-Static Synchronous
                 Dataflow Graphs",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "68:1--68:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2792981",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a mathematical
                 characterisation of admissible schedules of
                 cyclo-static dataflow (csdf) graphs. We demonstrate how
                 algebra ic manipulation of this characterization is
                 related to unfolding csdf actors and how this
                 manipulation allows csdf graphs to be transformed into
                 mrsdf graphs that are equivalent, in the sense that
                 they admit the same set of schedules. The presented
                 transformation allows the rich set of existing analysis
                 techniques for mrsdf graphs to be applied to csdf
                 graphs and generalizes the well-known transformations
                 from csdf and mrsdf into hsdf. Moreover, it gives rise
                 to an incremental approach to the analysis of csdf
                 graphs, where approximate analyses are combined with
                 exact transformations. We show the applicability of
                 this incremental approach by demonstrating its
                 effectiveness on the problem of optimizing buffer sizes
                 under a throughput constraint.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Germanos:2015:DUW,
  author =       "Vasileios Germanos and Stefan Haar and Victor Khomenko
                 and Stefan Schwoon",
  title =        "Diagnosability under Weak Fairness",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "69:1--69:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2832910",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In partially observed Petri nets, diagnosis is the
                 task of detecting whether the given sequence of
                 observed labels indicates that some unobservable fault
                 has occurred. Diagnosability is an associated property
                 of the Petri net, stating that in any possible
                 execution, an occurrence of a fault can eventually be
                 diagnosed. In this article, we consider diagnosability
                 under the weak fairness (WF) assumption, which
                 intuitively states that no transition from a given set
                 can stay enabled forever-it must eventually either fire
                 or be disabled. We show that a previous approach to
                 WF-diagnosability in the literature has a major flaw
                 and present a corrected notion. Moreover, we present an
                 efficient method for verifying WF-diagnosability based
                 on a reduction to LTL-X model checking. An important
                 advantage of this method is that the LTL-X formula is
                 fixed-in particular, the WF assumption does not have to
                 be expressed as a part of it (which would make the
                 formula length proportional to the size of the
                 specification), but rather the ability of existing
                 model checkers to handle weak fairness directly is
                 exploited.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2015:SGP,
  author =       "Gung-Yu Pan and Jed Yang and Jing-Yang Jou and
                 Bo-Cheng Charles Lai",
  title =        "Scalable Global Power Management Policy Based on
                 Combinatorial Optimization for Multiprocessors",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "70:1--70:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2811404",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multiprocessors have become the main architecture
                 trend in modern systems due to the superior
                 performance; nevertheless, the power consumption
                 remains a critical challenge. Global power management
                 (GPM) aims at dynamically finding the power state
                 combination that satisfies the power budget constraint
                 while maximizing the overall performance (or vice
                 versa). Due to the increasing number of cores in a
                 multiprocessor system, the scalability of GPM policies
                 has become critical when searching satisfactory state
                 combinations within acceptable time. This article
                 proposes a highly scalable policy based on
                 combinatorial optimization with theoretical proofs,
                 whereas previous works take exhaustive search or
                 heuristic methods. The proposed policy first applies an
                 optimum algorithm to construct a state combination
                 table in pseudo--polynomial time using dynamic
                 programming. Then, the state combination is assigned to
                 cores with minimum transition cost in linear time by
                 mapping to the network flow problem. Simulation results
                 show that the proposed policy achieves better system
                 performance for any given power budget when compared to
                 the state-of-the-art heuristic. Furthermore, the
                 proposed policy demonstrates its prominent scalability
                 with 125 times faster policy runtime for 512 cores.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lu:2015:ECA,
  author =       "Jing Lu and Ke Bai and Aviral Shrivastava",
  title =        "Efficient Code Assignment Techniques for Local Memory
                 on Software Managed Multicores",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "71:1--71:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2738039",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Scaling the memory hierarchy is a major challenge when
                 we scale the number of cores in a multicore processor.
                 Software Managed Multicore (SMM) architectures come up
                 as one of the promising solutions. In an SMM
                 architecture, there are no caches, and each core has
                 only a local scratchpad memory [Banakar et al. 2002].
                 As the local memory usually is small, large
                 applications cannot be directly executed on it. Code
                 and data of the task mapped to each core need to be
                 managed between global memory and local memory. This
                 article solves the problem of efficiently managing code
                 on an SMM architecture. The primary requirement of
                 generating efficient code assignments is a correct
                 management cost model. In this article, we address this
                 problem by proposing a cost calculation graph. In
                 addition, we develop two heuristics CMSM (Code Mapping
                 for Software Managed multicores) and CMSM\_advanced
                 that result in efficient code management execution on
                 the local scratchpad memory. Experimental results
                 collected after executing applications from the MiBench
                 suite [Guthaus et al. 2001] demonstrate that merely by
                 adopting the correct management cost calculation, even
                 using previous code assignment schemes, we can improve
                 performance by an average of 12\%. Combining the
                 correct management cost model and a more optimized code
                 mapping algorithm together, our heuristics can reduce
                 runtime in more than 80\% of the cases, and by up to
                 20\% on our set of benchmarks, compared to the
                 state-of-the-art code assignment approach [Jung et al.
                 2010]. When compared with Instruction-level Parallelism
                 (ILP) results, CMSM\_advanced performs an average of
                 5\% worse. We also simulate the benchmarks on a
                 cache-based system, and find that the code management
                 overhead on SMM core with our code management is much
                 less than memory latency of a cache-based system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kamal:2015:OHC,
  author =       "Mehdi Kamal and Ali Afzali-Kusha and Saeed Safari and
                 Massoud Pedram",
  title =        "{OPLE}: a Heuristic Custom Instruction Selection
                 Algorithm Based on Partitioning and Local Exploration
                 of Application Dataflow Graphs",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "72:1--72:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2764458",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a heuristic custom instruction (CI)
                 selection algorithm is presented. The proposed
                 algorithm, which is called OPLE for ``Optimization
                 based on Partitioning and Local Exploration,'' uses a
                 combination of greedy and optimal optimization methods.
                 It searches for the near-optimal solution by reducing
                 the search space based on partitioning the identified
                 CI set. The partitioning of the identified set
                 guarantees the success of the algorithm independent of
                 the size of the identified set. First, the algorithm
                 finds the near-optimal CIs from the candidate CIs for
                 each part. Next, the suggested CIs from different parts
                 are combined to determine the final selected CI set. To
                 improve the set of the selected CIs, the solution is
                 evolved by calling the algorithm iteratively. The
                 efficacy of the algorithm is assessed by comparing its
                 performance to those of optimal and nonoptimal methods.
                 A comparative study is performed for a number of
                 benchmarks under different area budgets and I/O
                 constraints. The results reveal higher speedups for the
                 OPLE algorithm, especially for larger identified
                 candidate sets and/or small area budgets compared to
                 those of the nonoptimal solutions. Compared to the
                 nonoptimal techniques, the proposed algorithm provides
                 30\% higher speedup improvement on average. The maximum
                 improvement is 117\%. The results also demonstrate that
                 in many cases OPLE is able to find the optimal
                 solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Palossi:2015:CDP,
  author =       "Daniele Palossi and Martino Ruggiero and Luca Benini",
  title =        "{$3$D} {CV} Descriptor on Parallel Heterogeneous
                 Platforms",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "73:1--73:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2733377",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded three-dimensional (3D) Computer Vision (CV)
                 is considered a technology enabler for future consumer
                 applications, attracting a wide interest in academia
                 and industry. However, 3D CV processing is a
                 computation-intensive task. Its high computational cost
                 is directly related to the processing of 3D point
                 clouds, with the 3D descriptor computation representing
                 one of the main bottlenecks. Understanding the main
                 computational challenges of 3D CV applications, as well
                 as the key characteristics, enabling features, and
                 limitations of current computing platforms, is clearly
                 strategic to identify the directions of evolution for
                 future embedded processing systems targeting 3D CV. In
                 this work, an innovative and complex 3D descriptor
                 (called SHOT) has been ported on a high-end and an
                 embedded computing platform. The high-end system is
                 composed by a high-performance Intel CPU coupled with a
                 Nvidia GPU. The embedded platform is, instead, composed
                 by an ARM-based processor, coupled with the STHORM
                 accelerator. STHORM is a many-core low-power
                 accelerator developed by ST Microelectronics, featuring
                 up to 64 computational units. The SHOT descriptor has
                 been parallelized using the OpenCL programming model
                 for both platforms. Finally, we have performed an
                 in-depth performance comparison and analysis between
                 general-purpose processors and accelerators in both
                 high-end and embedded domains, discussing and
                 highlighting the main differences in the
                 Hardware/Software (HW/SW) design methodologies and
                 approaches between high-end and embedded systems
                 targeting 3D CV applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2015:CIB,
  author =       "Guohui Li and Yi Zhang and Jianjun Li",
  title =        "{Crenel}-Interval-Based Dynamic Power Management for
                 Periodic Real-Time Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "74:1--74:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2744197",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to save the energy consumption of real-time
                 embedded systems, the integration of Dynamic Voltage
                 and Frequency Scaling (DVFS) and Device Power
                 Management (DPM) techniques has been well studied. In
                 this article, we propose a new energy management scheme
                 for periodic real-time tasks with implicit deadlines.
                 We mainly focus on the DPM part by presenting a novel
                 approach to the real-time DPM problem. Specifically, we
                 first identify intervals for each device, which we
                 refer to as Crenel Intervals, by partitioning the
                 Earliest Deadline First (EDF) schedule of the tasks
                 that need to access the device into successive
                 intervals. The principle for identifying Crenel
                 Intervals is that for each task, there is only one
                 deadline located in each Crenel Interval. Next,
                 targeting at a single device model and a multiple
                 device model, respectively, we propose the CI-EDF and
                 CI-EDF$^m$ algorithms to schedule task instances in
                 each Crenel Interval, so as to form long and continuous
                 slacks in each Crenel Interval but without jeopardizing
                 any task deadlines. Then, the slack in the Crenel
                 Intervals can be utilized to perform not only DPM, but
                 also DVFS. The experimental results show that our
                 approaches can achieve considerably more energy savings
                 than existing techniques with comparable quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mihajlovic:2015:AAR,
  author =       "Bojan Mihajlovi{\'c} and Zeljko Zili{\'c} and Warren
                 J. Gross",
  title =        "Architecture-Aware Real-Time Compression of Execution
                 Traces",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "75:1--75:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2766449",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In recent years, on-chip trace generation has been
                 recognized as a solution to the debugging of
                 increasingly complex software. An execution trace can
                 be seen as the most fundamentally useful type of trace,
                 allowing the execution path of software to be
                 determined post hoc. However, the bandwidth required to
                 output such a trace can be excessive. Our
                 architecture-aware trace compression (AATC) scheme adds
                 an on-chip branch predictor and branch target buffer to
                 reduce the volume of execution trace data in real time
                 through on-chip compression. Novel redundancy reduction
                 strategies are employed, most notably in exploiting the
                 widespread use of linked branches and the
                 compiler-driven movement of return addresses between
                 link register, stack, and program counter. In doing so,
                 the volume of branch target addresses is reduced by
                 52\%, whereas other algorithmic improvements further
                 decrease trace volume. An analysis of spatial and
                 temporal redundancy in the trace stream allows a
                 comparison of encoding strategies to be made for
                 systematically increasing compression performance. A
                 combination of differential, Fibonacci, VarLen, and
                 Move-to-Front encodings are chosen to produce two
                 compressor variants: a performance-focused xAATC that
                 encodes 56.5 instructions/bit using 24,133 gates and an
                 area-efficient fAATC that encodes 48.1 instructions/bit
                 using only 9,854 gates.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bak:2015:SPD,
  author =       "Stanley Bak and Zhenqi Huang and Fardin Abdi Taghi
                 Abad and Marco Caccamo",
  title =        "Safety and Progress for Distributed Cyber-Physical
                 Systems with Unreliable Communication",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "76:1--76:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2739046",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPSs) may interact and
                 manipulate objects in the physical world, and therefore
                 formal guarantees about their behavior are strongly
                 desired. Static-time proofs of safety invariants,
                 however, may be intractable for systems with
                 distributed physical-world interactions. This is
                 further complicated when realistic communication models
                 are considered, for which there may not be bounds on
                 message delays, or even when considering that messages
                 will eventually reach their destination. In this work,
                 we address the challenge of proving safety and progress
                 in distributed CPSs communicating over an unreliable
                 communication layer. We show that for this type of
                 communication model, system safety is closely related
                 to the results of a hybrid system's reachability
                 computation, which can be computed at runtime. However,
                 since computing reachability at runtime may be
                 computationally intensive, we provide an approach that
                 moves significant parts of the computation to design
                 time. This approach is demonstrated with a case study
                 of a simulation of multiple vehicles moving within a
                 shared environment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Catania:2015:PSR,
  author =       "Vincenzo Catania and Andrea Araldo and Davide Patti",
  title =        "Parameter Space Representation of {Pareto} Front to
                 Explore Hardware--Software Dependencies",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "77:1--77:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2764457",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems design requires conflicting
                 objectives to be optimized with an appropriate choice
                 of hardware-software parameters. A simulation campaign
                 can guide the design in finding the best trade-offs,
                 but due to the big number of possible configurations,
                 it is often infeasible to simulate them all. For these
                 reasons, design space exploration algorithms aim at
                 finding near-optimal system configurations by
                 simulating only a subset of them. In this work, we
                 present PS, a new multiobjective optimization
                 algorithm, and evaluate it in the context of the
                 embedded system design. The basic idea is to recognize
                 interesting regions-that is, regions of the
                 configuration space that provide better configurations
                 with respect to other ones. PS evaluates more
                 configurations in the interesting regions while less
                 thoroughly exploring the rest of the configuration
                 space. After a detailed formal description of the
                 algorithm and the underlying concepts, we show a case
                 study involving the hardware/software exploration of a
                 VLIW architecture. Qualitative and quantitative
                 comparisons of PS against a well-known multiobjective
                 genetic approach demonstrate that while not
                 outperforming it in terms of Pareto dominance, the
                 proposed approach can balance the uniformity and
                 granularity qualities of the solutions found, obtaining
                 more extended Pareto fronts that provide a wider view
                 of the potentiality of the designed device. Therefore,
                 PS represents a further valid choice for the designer
                 when objective constrains allow it.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Matthews:2015:PTS,
  author =       "Adam Matthews and Stanislav Bobovych and Nilanjan
                 Banerjee and James P. Parkerson and Ryan Robucci and
                 Chintan Patel",
  title =        "{Perpetuu}: a Tiered Solar-powered {GIS} Microserver",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "78:1--78:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2767128",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The aftermath of a natural disaster is characterized
                 by lack of a reliable medium for dissemination of
                 information to survivors. The state-of-the-art
                 emergency response systems rely on satellite
                 radio-enabled devices, but survivors, unlike first
                 responders, do not have access to such devices. To
                 mitigate this problem, we present perpetuu, a
                 solar-powered portable GIS microserver. The microserver
                 node can be deployed in a disaster scene and can serve
                 maps to survivors viewable on browsers of off-the-shelf
                 mobile systems. The perpetuu nodes can form a wireless
                 mesh to cover a large geographic region. A key
                 innovation in the design of the perpetuu node is a
                 tiered software and hardware architecture --- the
                 system combines a low-power micro-controller with a
                 high-power micro-processor to provide a large spectrum
                 of power states. perpetuu stays in its lowest power
                 state most of the time, and it can in-vitro detect
                 survivors using Wi-Fi sensing, and consequently wake up
                 the higher-power tier to disseminate high-resolution
                 maps on standard web browsers that provide directions
                 to safe locations. The tiered design leverages
                 hardware-assisted energy measurements and a wakeup
                 controller to balance energy harvested from solar
                 panels with energy consumed by the system. We evaluate
                 perpetuu using measurements from our prototype and
                 trace-based simulations, and show that it can function
                 near-perpetually while serving maps to a large number
                 of survivors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Medhat:2015:RMC,
  author =       "Ramy Medhat and Borzoo Bonakdarpour and Deepak Kumar
                 and Sebastian Fischmeister",
  title =        "Runtime Monitoring of Cyber-Physical Systems Under
                 Timing and Memory Constraints",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "79:1--79:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2744196",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The goal of runtime monitoring is to inspect the
                 well-being of a system by employing a monitor process
                 that reads the state of the system during execution and
                 evaluates a set of properties expressed in some
                 specification language. The main challenge in runtime
                 monitoring is dealing with the costs imposed in terms
                 of resource utilization. In the context of
                 cyber-physical systems, it is crucial for a software
                 monitoring solution to be time predictable to improve
                 scheduling, as well as support composition of
                 monitoring solutions with an overall predictable
                 behavior. Moreover, a small memory footprint is often
                 required in components of cyber-physical systems,
                 especially in deeply embedded systems. In this article,
                 we propose a novel control-theoretic software
                 monitoring solution for coordinating time
                 predictability and memory utilization in runtime
                 monitoring of systems that interact with the physical
                 world. The controllers attempt to reduce monitoring
                 jitter and maximize memory utilization while
                 simultaneously ensuring the soundness of evaluation of
                 properties. For systems where multiple properties are
                 required to be monitored simultaneously, we construct a
                 buffer sharing mechanism in which controllers
                 dynamically share the memory space to negate the effect
                 of bursts of environment actions, thus reducing jitter
                 due to transient high loads. To validate our design
                 choices, we present three case studies: (1) a Bluetooth
                 mobile payment system, which shows a sporadic rate of
                 events during peak hours; (2) a laser beam stabilizer
                 for target tracking, and (3) a monitoring system for
                 air/fuel ratio in a car engine exhaust and the CAM
                 inlet position in the engine's cylinders. The
                 experimental results of the case studies demonstrate up
                 to 40\% improvement in time predictability of the
                 monitoring solution when compared to a basic
                 event-triggered approach. Moreover, memory utilization
                 reaches an average of 90\% when using our dynamic
                 buffer resizing mechanism.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gebotys:2015:SWP,
  author =       "Catherine H. Gebotys and Brian A. White",
  title =        "A Sliding Window Phase-Only Correlation Method for
                 Side-Channel Alignment in a {Smartphone}",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "80:1--80:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2783441",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Future wireless embedded devices will be increasingly
                 powerful, supporting many more applications including
                 one of the most crucial, security. Although many
                 embedded devices offer resistance to bus probing
                 attacks due to their compact size and high levels of
                 integration, susceptibility to attacks on their
                 electromagnetic side channel must be analyzed. This
                 side channel is often quite complex to analyze due to
                 the complexities of the embedded device including
                 operating system, interrupts, and so forth. This
                 article presents a new methodology for analyzing a
                 complex system's vulnerability to the EM side channel.
                 The methodology proposes a sliding window phase-only
                 correlation method for aligning electromagnetic
                 emanations from a complex smartphone running native
                 code utilizing an on-chip cache. Unlike previous
                 research, experimental results demonstrate that data
                 written to on-chip cache within an advanced 312MHz
                 0.13um processor executing AES can be attacked
                 utilizing this new methodology. Furthermore, for the
                 first time, it has been shown that the point of
                 side-channel attack is not a spike of increased EM but
                 an area of low EM amplitude, unlike what is noted in
                 previous findings. This research is important for
                 advancing side-channel analysis understanding in
                 complex embedded processors and ensuring secure
                 implementations in future embedded ubiquitous
                 devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2015:RSP,
  author =       "Qingling Zhao and Zonghua Gu and Haibo Zeng",
  title =        "Resource Synchronization and Preemption Thresholds
                 Within Mixed-Criticality Scheduling",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "81:1--81:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2783440",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In a mixed-criticality system, multiple tasks with
                 different levels of criticality may coexist on the same
                 hardware platform. The scheduling algorithm EDF-VD
                 (Earliest Deadline First with Virtual Deadlines) has
                 been proposed for mixed-criticality systems, which
                 assumes tasks do not share any common resources. We
                 present MC-SRP (Mixed-Criticality Stack Resource
                 Policy), a resource synchronization protocol for
                 EDF-VD, which allows resource sharing among tasks at
                 the same criticality level and guarantees that each
                 task is blocked at most once in each criticality mode.
                 In addition, we present MC-SRPT (MC-SRP with
                 Thresholds) for reducing the application stack size
                 requirement in resource-constrained embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2015:SDE,
  author =       "Ming-Ju Wu and Chun-Jen Tsai",
  title =        "A Storage Device Emulator for System Performance
                 Evaluation",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "82:1--82:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2785969",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The performance and characteristics of the storage
                 devices used in embedded systems can have a great
                 influence on the overall end user experience. When
                 building embedded systems or designing new storage
                 device components, it is important for the designers to
                 be able to evaluate how storage devices of different
                 characteristics will affect the overall system
                 performance. Storage device emulation enables a
                 system's performance to be evaluated with simulated
                 storage devices that are not yet available. In storage
                 device emulation, the emulated storage device appears
                 to the operating system (OS) as a real storage device
                 and its service timings are determined by a disk model,
                 which simulates the behavior of the target storage
                 device. In the conventional storage device emulators,
                 because the OS is running continuously in the real-time
                 domain, the amount of time that the emulators can spend
                 on processing each I/O request is limited by the
                 service time of each corresponding I/O request. This
                 timing constraint can make emulating high-speed storage
                 devices a challenge for the conventional storage device
                 emulators. In this article, we propose an OS state
                 pausing approach to storage device emulation that can
                 overcome the timing constraints faced by the
                 conventional storage device emulators. By pausing the
                 state of the OS while the storage device emulator is
                 busy, the proposed emulator can spend as much time as
                 it needs for processing each I/O request without
                 affecting the performance of the emulated storage
                 device as perceived by the OS. This allows the proposed
                 storage device emulator to emulate storage devices that
                 would otherwise be challenging or even impossible for
                 the conventional storage device emulators. In addition,
                 the main task of storage device emulation is offloaded
                 to an external computer to minimize the impact of the
                 emulation workload on the target machine. The proposed
                 storage device emulator is implemented with the Linux
                 OS$^1$ on an embedded system development board.
                 Experimental results show that the full-system
                 performance benchmarks measured with the proposed
                 storage device emulator are within 2\% differences
                 compared to the results of the reference system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mirzoyan:2015:MNG,
  author =       "Davit Mirzoyan and Benny Akesson and Sander Stuijk and
                 Kees Goossens",
  title =        "Maximizing the Number of Good Dies for Streaming
                 Applications in {NoC}-Based0 {MPSoCs} Under Process
                 Variation",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "83:1--83:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2785968",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Scaling CMOS technology into nanometer feature-size
                 nodes has made it practically impossible to precisely
                 control the manufacturing process. This results in
                 variation in the speed and power consumption of a
                 circuit. As a solution to process-induced variations,
                 circuits are conventionally implemented with
                 conservative design margins to guarantee the target
                 frequency of each hardware component in manufactured
                 multiprocessor chips. This approach, referred to as
                 worst-case design, results in a considerable circuit
                 upsizing, in turn reducing the number of dies on a
                 wafer. This work deals with the design of real-time
                 systems for streaming applications (e.g., video
                 decoders) constrained by a throughput requirement
                 (e.g., frames per second) with reduced design margins,
                 referred to as better-than-worst-case design. To this
                 end, the first contribution of this work is a complete
                 modeling framework that captures a streaming
                 application mapped to an NoC-based multiprocessor
                 system with voltage-frequency islands under
                 process-induced die-to-die and within-die frequency
                 variations. The framework is used to analyze the impact
                 of variations in the frequency of hardware components
                 on application throughput at the system level. The
                 second contribution of this work is a methodology to
                 use the proposed framework and estimate the impact of
                 reducing circuit design margins on the number of good
                 dies that satisfy the throughput requirement of a
                 real-time streaming application. We show on both
                 synthetic and real applications that the proposed
                 better-than-worst-case design approach can increase the
                 number of good dies by up to 9.6\% and 18.8\% for
                 designs with and without fixed SRAM and IO blocks,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2015:CDR,
  author =       "Shiwen Zhang and Qingquan Zhang and Sheng Xiao and
                 Ting Zhu and Yu Gu and Yaping Lin",
  title =        "Cooperative Data Reduction in Wireless Sensor
                 Network",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "84:1--84:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2786755",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In wireless sensor networks, owing to the limited
                 energy of the sensor node, it is very meaningful to
                 propose a dynamic scheduling scheme with data
                 management that reduces energy as soon as possible.
                 However, traditional techniques treat data management
                 as an isolated process on only selected individual
                 nodes. In this article, we propose an aggressive data
                 reduction architecture, which is based on error control
                 within sensor segments and integrates three parallel
                 dynamic control mechanisms. We demonstrate that this
                 architecture not only achieves energy savings but also
                 guarantees the data accuracy specified by the
                 application. Furthermore, based on this architecture,
                 we propose two implementations. The experimental
                 results show that both implementations can raise the
                 energy savings while keeping the error at an predefined
                 and acceptable level. We observed that, compared with
                 the basic implementation, the enhancement
                 implementation achieves a relatively higher data
                 accuracy. Moreover, the enhancement implementation is
                 more suitable for the harsh environmental monitoring
                 applications. Further, when both implementations
                 achieve the same accuracy, the enhancement
                 implementation saves more energy. Extensive experiments
                 on realistic historical soil temperature data confirm
                 the efficacy and efficiency of two implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Scheir:2015:ASC,
  author =       "Marijn Scheir and Josep Balasch and Alfredo Rial and
                 Bart Preneel and Ingrid Verbauwhede",
  title =        "Anonymous Split {E}-Cash-Toward Mobile Anonymous
                 Payments",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "85:1--85:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2783439",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Anonymous E-Cash was first introduced in 1982 as a
                 digital, privacy-preserving alternative to physical
                 cash. A lot of research has since then been devoted to
                 extend and improve its properties, leading to the
                 appearance of multiple schemes. Despite this progress,
                 the practical feasibility of E-Cash systems is still
                 today an open question. Payment tokens are typically
                 portable hardware devices in smart card form, resource
                 constrained due to their size, and therefore not suited
                 to support largely complex protocols such as E-Cash.
                 Migrating to more powerful mobile platforms, for
                 instance, smartphones, seems a natural alternative.
                 However, this implies moving computations from trusted
                 and dedicated execution environments to generic
                 multiapplication platforms, which may result in
                 security vulnerabilities. In this work, we propose a
                 new anonymous E-Cash system to overcome this
                 limitation. Motivated by existing payment schemes based
                 on MTM (Mobile Trusted Module) architectures, we
                 consider at design time a model in which user payment
                 tokens are composed of two modules: an untrusted but
                 powerful execution platform (e.g., smartphone) and a
                 trusted but constrained platform (e.g., secure
                 element). We show how the protocol's computational
                 complexity can be relaxed by a secure split of
                 computations: nonsensitive operations are delegated to
                 the powerful platform, while sensitive computations are
                 kept in a secure environment. We provide a full
                 construction of our proposed Anonymous Split E-Cash
                 scheme and show that it fully complies with the main
                 properties of an ideal E-Cash system. Finally, we test
                 its performance by implementing it on an Android
                 smartphone equipped with a Java-Card-compatible secure
                 element.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jiang:2015:AEB,
  author =       "Jian-Min Jiang and Huibiao Zhu and Qin Li and Yongxin
                 Zhao and Lin Zhao and Shi Zhang and Ping Gong and Zhong
                 Hong",
  title =        "Analyzing Event-Based Scheduling in Concurrent
                 Reactive Systems",
  journal =      j-TECS,
  volume =       "14",
  number =       "4",
  pages =        "86:1--86:??",
  month =        dec,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2783438",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Dec 8 17:53:22 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The traditional research on scheduling focuses on task
                 scheduling and schedulability analysis in concurrent
                 reactive systems. In this article, we dedicate
                 ourselves to event-based scheduling. We first formally
                 define an event-based scheduling policy and propose the
                 notion of the correctness of a scheduling policy in
                 terms of weak termination. Then we investigate the
                 correctness of the decomposition of scheduling controls
                 and finally obtain a decentralized scheduling method.
                 The method can automatically decompose the scheduling
                 policies of a concurrent reactive system into atomic
                 scheduling policies. Every atomic scheduling policy
                 corresponds to one subsystem. Each of the subsystems is
                 a completely independent system, which may be developed
                 and deployed independently. An experiment demonstrates
                 these results that may help engineers to design correct
                 and efficient schedule policies for a concurrent
                 reactive system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mandal:2016:DIW,
  author =       "Kalikinkar Mandal and Xinxin Fan and Guang Gong",
  title =        "Design and Implementation of {Warbler} Family of
                 Lightweight Pseudorandom Number Generators for Smart
                 Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2808230",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the advent of ubiquitous computing and the
                 Internet of Things (IoT), the security and privacy
                 issues for various smart devices such as
                 radio-frequency identification (RFID) tags and wireless
                 sensor nodes are receiving increased attention from
                 academia and industry. A number of lightweight
                 cryptographic primitives have been proposed to provide
                 security services for resource-constrained smart
                 devices. As one of the core primitives, a
                 cryptographically secure pseudorandom number generator
                 (PRNG) plays an important role for lightweight embedded
                 applications. The most existing PRNGs proposed for
                 smart devices employ true random number generators as a
                 component, which generally incur significant power
                 consumption and gate count in hardware. In this
                 article, we present Warbler family, a new pseudorandom
                 number generator family based on nonlinear feedback
                 shift registers (NLFSRs) with desirable randomness
                 properties. The design of the Warbler family is based
                 on the combination of modified de Bruijn blocks
                 together with a nonlinear feedback Welch-Gong (WG)
                 sequence generator, which enables us to precisely
                 characterize the randomness properties and to flexibly
                 adjust the security level of the resulting PRNG. Some
                 criteria for selecting parameters of the Warbler family
                 are proposed to offer the maximum level of security.
                 Two instances of the Warbler family are also described,
                 which feature two different security levels and are
                 dedicated to EPC C1 Gen2 RFID tags and wireless sensor
                 nodes, respectively. The security analysis shows that
                 the proposed instances not only can pass the
                 cryptographic statistical tests recommended by the EPC
                 C1 Gen2 standard and NIST but also are resistant to the
                 cryptanalytic attacks such as algebraic attacks, cube
                 attacks, time-memory-data tradeoff attacks,
                 Mihaljevi{\'c} et al.'s attacks, and weak internal
                 state and fault injection attacks. Our ASIC
                 implementations using a 65nm CMOS process demonstrate
                 that the proposed two lightweight instances of the
                 Warbler family can achieve good performance in terms of
                 speed and area and provide ideal solutions for securing
                 low-cost smart devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Poddar:2016:DHP,
  author =       "Soumyajit Poddar and Prasun Ghosal and Hafizur
                 Rahaman",
  title =        "Design of a High-Performance {CDMA}-Based
                 Broadcast-Free Photonic Multi-Core Network on Chip",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2839301",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Present-day focus on multicore research has not only
                 increased computing power but also power- and
                 bandwidth-efficient communication among cores. On-chip
                 communication networks have become popular today
                 because of their low energy use and modular structure
                 compared to bus-based interconnects. Silicon photonics
                 has further boosted the performance of on-chip
                 interconnection networks with its low energy-delay
                 product and high reliability. In current multicore
                 Network-on-Chip (NoC) architectures, photonics is
                 playing an important role in transferring large volumes
                 of data both on- and off-chip. The problem addressed in
                 this work is the issue of broadcast traffic arising due
                 to invalidation requests from on-chip cache memories.
                 Although such traffic is typically less than 1\% of
                 total traffic, it can easily present a high load on
                 network resources, creating congestion and degrading
                 performance. In this article, we propose a CDMA-based,
                 secure, scalable, and energy-efficient technique to
                 eliminate broadcast invalidations and increase overall
                 performance. Experimental results indicate a
                 performance boost up to 22.2\% over a competing
                 Photonic NoC and up to 57.4\% over Electrical
                 Mesh-based NoC when the proposed technique is used.
                 Additional hardware deployed has an area overhead of
                 less than 1\%, whereas total energy consumed is at par
                 with other state-of-the-art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Santini:2016:BCS,
  author =       "Thiago Santini and Paolo Rech and Gabriel Luca Nazar
                 and Fl{\'a}vio Rech Wagner",
  title =        "Beyond Cross-Section: Spatio-Temporal Reliability
                 Analysis",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2794148",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A computational system employed in safety-critical
                 applications typically has reliability as a primary
                 concern. Thus, the designer focuses on minimizing the
                 device radiation-sensitive area, often leading to
                 performance degradation. In this article, we present a
                 mathematical model to evaluate system reliability in
                 spatial (i.e., radiation-sensitive area) and temporal
                 (i.e., performance) terms and prove that minimizing
                 radiation-sensitive area does not necessarily maximize
                 application reliability. To support our claim, we
                 present an empirical counterexample where application
                 reliability is improved even if the radiation-sensitive
                 area of the device is increased. An extensive radiation
                 test campaign using a 28 nm commercial-off-the-shelf
                 ARM-based SoC was conducted, and experimental results
                 demonstrate that, while executing the considered
                 application at military aircraft altitude, the
                 probability of executing a two-year mission workload
                 without failures is increased by 5.85\% if L1 caches
                 are enabled (thus increasing the radiation-sensitive
                 area) when compared to no cache level being enabled.
                 However, if both L1 and L2 caches are enabled, the
                 probability is decreased by 31.59\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gebotys:2016:PCP,
  author =       "Catherine H. Gebotys and Brian A. White and Edgar
                 Mateos",
  title =        "Preaveraging and Carry Propagate Approaches to
                 Side-Channel Analysis of {HMAC-SHA256}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2794093",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Although HMAC-SHA has been standardized for over a
                 decade, few published attacks on the single-cycle round
                 implementation exist. In this research, new attack
                 techniques are provided, for the first time, (1) to
                 help to discriminate between values of secret
                 intermediate variables within HMAC and (2) to reduce
                 the large word size complexity. Preaveraging and carry
                 propagate techniques are proposed using chosen
                 plaintexts and shown to significantly reduce the
                 complexity and runtimes for side-channel analysis of an
                 Altera FPGA platform. This research is important for
                 advancing side channel analysis of complex embedded
                 ASICs and ensuring secure implementations in future
                 embedded ubiquitous devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2016:DLD,
  author =       "Wei Dong and Luyao Luo and Chao Huang",
  title =        "Dynamic Logging with Dylog in Networked Embedded
                 Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2807698",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Event logging is an important technique for networked
                 embedded systems like wireless sensor networks. It can
                 greatly help developers to understand complex system
                 behaviors and diagnose program bugs. Existing logging
                 facilities do not well satisfy three practical
                 requirements: flexibility, efficiency, and high
                 synchronization accuracy. To simultaneously satisfy
                 these requirements, we present Dylog, a dynamic logging
                 facility for networked embedded systems. Dylog employs
                 several techniques. First, Dylog uses binary
                 instrumentation for dynamically inserting or removing
                 logging statements, enabling flexible and interactive
                 debugging at runtime. Second, Dylog incorporates an
                 efficient storage system and log collection protocol
                 for recording and transferring the logging messages.
                 Third, Dylog employs a lightweight data-driven approach
                 for reconstructing the synchronized time of the logging
                 messages. Dylog uses MAC-layer timestamping and drift
                 compensation to achieve high synchronization accuracy.
                 We implement Dylog on the TinyOS 2.1.1/TelosB platform.
                 Results show the following: (1) Dylog incurs a small
                 overhead. Indirections in Dylog incur an additional
                 execution overhead of less than 1\%. Dylog reduces the
                 logging storage size by approximately 50\% compared
                 with the standard TinyOS radio printf library. Dylog
                 reduces the patch size by more than 90\%, compared with
                 incremental reprogramming. (2) Dylog reduces the
                 synchronization overhead by 78\% in terms of
                 transmission cost, compared with a traditional time
                 synchronization protocol, FTSP, and it can achieve a
                 high time synchronization accuracy of 5.4 $ \mu $ s.
                 (3) Dylog can help diagnose system problems effectively
                 at the source-code level for three real-world
                 scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jiang:2016:PAD,
  author =       "Ke Jiang and Petru Eles and Zebo Peng",
  title =        "Power-Aware Design Techniques of Secure Multimode
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2801152",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Nowadays, embedded systems have been widely used in
                 all types of application areas, some of which belong to
                 the safety and reliability critical domains. The
                 functional correctness and design robustness of the
                 embedded systems involved in such domains are crucial
                 for the safety of personal/enterprise property or even
                 human lives. Thereby, a holistic design procedure that
                 considers all the important design concerns is
                 essential. In this article, we approach embedded
                 systems design from an integral perspective. We
                 consider not only the classic real-time and quality of
                 service requirements, but also the emerging security
                 and power efficiency demands. Modern embedded systems
                 are not any more developed for a fixed purpose, but
                 instead designed for undertaking various processing
                 requests. This leads to the concept of multimode
                 embedded systems, in which the number and nature of
                 active tasks change during runtime. Under dynamic
                 situations, providing high performance along with
                 various design concerns becomes a really difficult
                 problem. Therefore, we propose a novel power-aware
                 secure embedded systems design framework that
                 efficiently solves the problem of runtime quality
                 optimization with security and power constraints. The
                 efficiency of our proposed techniques are evaluated in
                 extensive experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bambagini:2016:EAS,
  author =       "Mario Bambagini and Mauro Marinoni and Hakan Aydin and
                 Giorgio Buttazzo",
  title =        "Energy-Aware Scheduling for Real-Time Systems: a
                 Survey",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2808231",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a survey of energy-aware
                 scheduling algorithms proposed for real-time systems.
                 The analysis presents the main results starting from
                 the middle 1990s until today, showing how the proposed
                 solutions evolved to address the evolution of the
                 platform's features and needs. The survey first
                 presents a taxonomy to classify the existing approaches
                 for uniprocessor systems, distinguishing them according
                 to the technology exploited for reducing energy
                 consumption, that is, Dynamic Voltage and Frequency
                 Scaling (DVFS), Dynamic Power Management (DPM), or
                 both. Then, the survey discusses the approaches
                 proposed in the literature to deal with the additional
                 problems related to the evolution of computing
                 platforms toward multicore architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Thomas:2016:EDP,
  author =       "Anna Thomas and Karthik Pattabiraman",
  title =        "Error Detector Placement for Soft Computing
                 Applications",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2801154",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The scaling of Silicon devices has exacerbated the
                 unreliability of modern computer systems, and power
                 constraints have necessitated the involvement of
                 software in hardware error detection. At the same time,
                 emerging workloads in the form of soft computing
                 applications (e.g., multimedia applications) can
                 tolerate most hardware errors as long as the erroneous
                 outputs do not deviate significantly from error-free
                 outcomes. We term outcomes that deviate significantly
                 from the error-free outcomes as Egregious Data
                 Corruptions (EDCs). In this study, we propose a
                 technique to place detectors for selectively detecting
                 EDC-causing errors in an application. We performed an
                 initial study to formulate heuristics that identify
                 EDC-causing data. Based on these heuristics, we
                 developed an algorithm that identifies program
                 locations for placing high coverage detectors for EDCs
                 using static analysis. Our technique achieves an
                 average EDC coverage of 82\%, under performance
                 overheads of 10\%, while detecting 10\% of the Non-EDC
                 and benign faults. We also evaluate the error
                 resilience of these applications under the 14 compiler
                 optimizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Corre:2016:TTB,
  author =       "Youenn Corre and Jean-Philippe Diguet and Dominique
                 Heller and Dominique Blouin and Lo{\"\i}c Lagadec",
  title =        "{TBES}: Template-Based Exploration and Synthesis of
                 Heterogeneous Multiprocessor Architectures on {FPGA}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2816817",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article describes TBES, a software end-to-end
                 environment for synthesizing multitask applications on
                 FPGAs. The implementation follows a template-based
                 approach for creating heterogeneous multiprocessor
                 architectures. Heterogeneity stems from the use of
                 general-purpose processors along with custom
                 accelerators. Experimental results demonstrate
                 substantial speedup for several classes of
                 applications. Furthermore, this work allows for
                 reducing development costs and saving development time
                 for the software architect, the domain expert, and the
                 optimization expert. This work provides a framework to
                 bring together various existing tools and optimisation
                 algorithms. The advantages are manifold: modularity and
                 flexibility, easy customization for best-fit algorithm
                 selection, durability and evolution over time, and
                 legacy preservation including domain experts' know-how.
                 In addition to the use of architecture templates for
                 the overall system, a second contribution lies in using
                 high-level synthesis for promoting exploration of
                 hardware IPs. The domain expert, who best knows which
                 tasks are good candidates for hardware implementation,
                 selects parts of the initial application to be
                 potentially synthesized as dedicated accelerators. As a
                 consequence, the HLS general problem turns into a
                 constrained and more tractable issue, and automation
                 capabilities eliminate the need for tedious and
                 error-prone manual processes during domain space
                 exploration. The automation only takes place once the
                 application has been broken down into concurrent tasks
                 by the designer, who can then drive the synthesis
                 process with a set of parameters provided by TBES to
                 balance tradeoffs between optimization efforts and
                 quality of results. The approach is demonstrated step
                 by step up to FPGA implementations and executions with
                 an MJPEG benchmark and a complex Viola-Jones face
                 detection application. We show that TBES allows one to
                 achieve results with up to 10 times speedup to reduce
                 development times and to widen design space
                 exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chatterjee:2016:TAD,
  author =       "Urbi Chatterjee and Rajat Subhra Chakraborty and
                 Hitesh Kapoor and Debdeep Mukhopadhyay",
  title =        "Theory and Application of Delay Constraints in Arbiter
                 {PUF}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2815621",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Physically Unclonable Function (PUF) circuits are
                 often vulnerable to mathematical model-building
                 attacks. We theoretically quantify the advantage
                 provided to an adversary by any training dataset
                 expansion technique along the lines of security
                 analysis of cryptographic hash functions. We present an
                 algorithm to enumerate certain sets of delay
                 constraints for the widely studied Arbiter PUF (APUF)
                 circuit, then demonstrate how these delay constraints
                 can be utilized to expand the set of known
                 Challenge--Response Pairs (CRPs), thus facilitating
                 model-building attacks. We provide experimental results
                 for Field Programmable Gate Array (FPGA)--based APUF to
                 establish the effectiveness of the proposed attack.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kwon:2016:CBF,
  author =       "Se Jin Kwon",
  title =        "A Cache-Based Flash Translation Layer for {TLC}-Based
                 Multimedia Storage Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2820614",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Current triple-level cell (TLC)-based solids-tate
                 drives used in multimedia storage devices support
                 multichannel access to increase capacity and
                 throughput. Unfortunately, current state-of-the-art FTL
                 algorithms must employ selective caching for inquiring
                 about the address mapping information, which causes low
                 space utilization, a large flash memory requirement,
                 and performance degradation. In this article, the {$<$
                 u$>$Ca$<$}/{u$>$} che- {$<$ u$>$ b$<$}/{u$>$} ased
                 Flash Translation Layer (Cab-FTL) is proposed for
                 TLC-based multimedia storage devices. Cab-FTL enhances
                 the read and write performances by achieving high space
                 utilization while reducing the size of the mapping
                 tables to 1.68\% compared to DFTL. Despite a caching of
                 the mapping tables in DRAM, Cab-FTL achieves a fast
                 system boot using its fast wake-up mechanism.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2016:EPC,
  author =       "Sheng-Min Huang and Li-Pin Chang",
  title =        "Exploiting Page Correlations for Write Buffering in
                 Page-Mapping Multichannel {SSDs}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2815622",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advanced solid-state disks (SSDs) have been equipped
                 with page-mapping flash translation layers and
                 multichannel architectures. The SSDs employ a RAM-based
                 write buffer, which delays write requests for reducing
                 write traffic, reorders requests for mitigating
                 garbage-collection overhead, and produces parallel page
                 writes for improving channel time utilization. This
                 work presents a novel write buffer algorithm that
                 exploits temporal and spatial correlations among buffer
                 pages. The write-buffer groups temporally or spatially
                 correlate buffer pages and then write the grouped
                 buffer pages to the same flash block. In this way, when
                 the correlated page data are updated in the future,
                 flash blocks will receive bulk page invalidations and
                 become good candidates for garbage collection. With
                 multichannel architectures, the write buffer adaptively
                 disperses read-most sequential data over channels for
                 high page-level parallelism of sequential reads, while
                 clustering write-most sequential data in the same
                 channel for a reduced cost of garbage collection. We
                 evaluated the proposed method and previously proposed
                 buffer algorithms. Our method was shown to outperform
                 the existing methods by up to 134\%. We also
                 implemented our buffer design on the OpenSSD platform;
                 the time and space overheads of our design were
                 reported to be very low.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2016:SGA,
  author =       "Li-Pin Chang and Yu-Syun Liu and Wen-Huei Lin",
  title =        "Stable Greedy: Adaptive Garbage Collection for Durable
                 Page-Mapping Multichannel {SSDs}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2820613",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Commodity solid state drives (SSDs) have recently
                 begun involving the adoption of powerful controllers
                 for multichannel flash management at the page level.
                 However, many of these models still use primitive
                 garbage-collection algorithms, because previous
                 approaches are subject to poor scalability with
                 high-capacity flash memory. This study presents Stable
                 Greedy for garbage collection in page-mapping
                 multichannel SSDs. Stable Greedy identifies
                 page-accurate data hotness using block-level
                 information, and jointly considers block space
                 utilization and block stability for victim selection.
                 Its design considers flash wear leveling for SSD
                 lifetime enhancement at the block level as well as at
                 the channel level. Stable Greedy runs at a constant
                 time, and requires limited RAM space. The simulation
                 results revealed that Stable Greedy outperformed
                 previous methods considerably under various workloads
                 and multichannel architectures. Stable Greedy was
                 successfully implemented on the OpenSSD platform, and
                 the actual performance measurements were consistent
                 with the simulation results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2016:FFJ,
  author =       "Jinghao Sun and Nan Guan and Yang Wang and Qingxu Deng
                 and Peng Zeng and Wang Yi",
  title =        "Feasibility of Fork-Join Real-Time Task Graph Models:
                 Hardness and Algorithms",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2809780",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the formal analysis of real-time systems, modeling
                 of branching codes and modeling of intratask
                 parallelism structures are two of the most important
                 research topics. These two real-time properties are
                 combined, resulting in the fork-join real-time task
                 (FJRT) model, which extends the digraph-based task
                 model with forking and joining semantics. We prove that
                 the EDF schedulability problem on a preemptive
                 uniprocessor for the FJRT model is coNP-hard in the
                 strong sense, even if the utilization of the task
                 system is bounded by a constant strictly less than 1.
                 Then, we show that the problem becomes tractable with
                 some slight structural restrictions on parallel
                 sections, for which we propose an exact schedulability
                 test with pseudo-polynomial time complexity. Our
                 results thus establish a borderline between the
                 tractable and intractable FJRT models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{DiPietro:2016:CLD,
  author =       "Roberto {Di Pietro} and Flavio Lombardi and Antonio
                 Villani",
  title =        "{CUDA} Leaks: a Detailed Hack for {CUDA} and a
                 (Partial) Fix",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2801153",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Graphics processing units (GPUs) are increasingly
                 common on desktops, servers, and embedded platforms. In
                 this article, we report on new security issues related
                 to CUDA, which is the most widespread platform for GPU
                 computing. In particular, details and proofs-of-concept
                 are provided about novel vulnerabilities to which CUDA
                 architectures are subject. We show how such
                 vulnerabilities can be exploited to cause severe
                 information leakage. As a case study, we experimentally
                 show how to exploit one of these vulnerabilities on a
                 GPU implementation of the AES encryption algorithm.
                 Finally, we also suggest software patches and
                 alternative approaches to tackle the presented
                 vulnerabilities.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2016:SDW,
  author =       "Zhenhuan Zhu and S. Olutunde Oyadiji",
  title =        "Structure Design of Wireless Sensor Nodes with Energy
                 and Cost Awareness for Multichannel Signal
                 Measurement",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2790300",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article aims to develop a design pattern of a
                 wireless sensor node working in multichannel signal
                 measurement for effectively lowering energy consumption
                 and cost. The proposed design pattern enables the
                 architecture of a wireless sensor node to adapt to
                 application requirements, thus to significantly reduce
                 system redundancy. Two multisensor structures are
                 parameterized regarding frequency response, power
                 consumption, and cost. The system design pattern
                 provides flexibility through three proposed interface
                 circuits that bridge between multisensor structures and
                 the microprocessors inside sensor nodes. It also allows
                 adjusting time the delay parameter that can enlarge the
                 selection range of main electronic components, and
                 thereby increases the robustness of the model for
                 practical implementations. A virtual case study is
                 provided to demonstrate how to apply this model into an
                 application design.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hsiu:2016:UCS,
  author =       "Pi-Cheng Hsiu and Po-Hsien Tseng and Wei-Ming Chen and
                 Chin-Chiang Pan and Tei-Wei Kuo",
  title =        "User-Centric Scheduling and Governing on Mobile
                 Devices with {big.LITTLE} Processors",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2829946",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Mobile applications will become progressively more
                 complicated and diverse. Heterogeneous computing
                 architectures like big.LITTLE are a hardware solution
                 that allows mobile devices to combine computing
                 performance and energy efficiency. However, software
                 solutions that conform to the paradigm of conventional
                 fair scheduling and governing are not applicable to
                 mobile systems, thereby degrading user experience or
                 reducing energy efficiency. In this article, we exploit
                 the concept of application sensitivity, which reflects
                 the user's attention on each application, and devise a
                 user-centric scheduler and governor that allocate
                 computing resources to applications according to their
                 sensitivity. Furthermore, we integrate our design into
                 the Android operating system. The results of
                 experiments conducted on a commercial big.LITTLE
                 smartphone with real-world mobile apps demonstrate that
                 the proposed design can achieve significant gains in
                 energy efficiency while improving the quality of user
                 experience.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sharma:2016:DFT,
  author =       "Namita Sharma and Preeti Ranjan Panda and Francky
                 Catthoor and Min Li and Prashant Agrawal",
  title =        "Data Flow Transformation for Energy-Efficient
                 Implementation of {Givens} Rotation-Based {QRD}",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2837025",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "QR decomposition (QRD), a matrix decomposition
                 algorithm widely used in embedded application domain,
                 can be realized in a large number of valid processing
                 sequences that differ significantly in the number of
                 memory accesses and computations, and hence the overall
                 implementation energy. With modern low-power embedded
                 processors evolving toward register files with wide
                 memory interfaces and vector functional units (FUs),
                 data flow in these algorithms needs to be carefully
                 devised to efficiently utilize the costly wide memory
                 accesses and the vector FUs. In this article, we
                 present an energy-efficient data flow transformation
                 strategy for the Givens rotation-based QRD.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Emeretlis:2016:LBB,
  author =       "Andreas Emeretlis and George Theodoridis and
                 Panayiotis Alefragis and Nikolaos Voros",
  title =        "A {Logic-Based Benders} Decomposition Approach for
                 Mapping Applications on Heterogeneous Multicore
                 Platforms",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2838733",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The development of efficient methods for mapping
                 applications on heterogeneous multicore platforms is a
                 key issue in the field of embedded systems. In this
                 article, a novel approach based on the Logic-Based
                 Benders decomposition principle is introduced for
                 mapping complex applications on these platforms, aiming
                 at optimizing their execution time. To provide optimal
                 solutions for this problem in a short time, a new
                 hybrid model that combines Integer Linear Programming
                 (ILP) and Constraint Programming (CP) models is
                 introduced. Also, to reduce the complexity of the model
                 and its solution time, a set of novel techniques for
                 generating additional constraints called Benders cuts
                 is proposed. An extensive set of experiments has been
                 performed in which synthetic applications described by
                 Directed Acyclic Graphs (DAGs) were mapped to a number
                 of heterogeneous multicore platforms. Moreover,
                 experiments with DAGs that correspond to two real-life
                 applications have also been performed. Based on the
                 experimental results, it is proven that the proposed
                 approach outperforms the pure ILP model in terms of the
                 solution time and quality of the solution.
                 Specifically, the proposed approach is able to find an
                 optimal solution within a time limit of 2 hours in the
                 vast majority of performed experiments, while the pure
                 ILP model fails. Also, for the cases where both methods
                 fail to find an optimal solution within the time limit,
                 the solution of the proposed approach is systematically
                 better than the solution of the ILP model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ko:2016:SBS,
  author =       "Yohan Ko and Jihoon Kang and Jongwon Lee and Yongjoo
                 Kim and Joonhyun Kim and Hwisoo So and Kyoungwoo Lee
                 and Yunheung Paek",
  title =        "Software-Based Selective Validation Techniques for
                 Robust {CGRAs} Against Soft Errors",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2843943",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Coarse-Grained Reconfigurable Architectures (CGRAs)
                 are drawing significant attention since they promise
                 both performances with parallelism and flexibility with
                 reconfiguration. Soft errors (or transient faults) are
                 becoming a serious design concern in embedded systems
                 including CGRAs since the soft error rate is increasing
                 exponentially as technology is scaling. A recently
                 proposed software-based technique with TMR (Triple
                 Modular Redundancy) implemented on CGRAs incurs extreme
                 overheads in terms of runtime and energy consumption
                 mainly due to expensive voting mechanisms for the
                 outputs from the triplication of every operation. In
                 this article, we propose selective validation
                 mechanisms for efficient modular redundancy techniques
                 in the datapaths on CGRAs. Our techniques selectively
                 validate the results at synchronous operations rather
                 than every operation in order to reduce the expensive
                 performance overhead from the validation mechanism. We
                 also present an optimization technique to further
                 improve the runtime and the energy consumption by
                 minimizing synchronous operations where a validating
                 mechanism needs to be applied. Our experimental results
                 demonstrate that our selective validation-based TMR
                 technique with our optimization on CGRAs can improve
                 the runtime by 41.0\% and the energy consumption by
                 26.2\% on average over benchmarks as compared to the
                 recently proposed software-based TMR technique with the
                 full validation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ivanov:2016:ARS,
  author =       "Radoslav Ivanov and Miroslav Pajic and Insup Lee",
  title =        "Attack-Resilient Sensor Fusion for Safety-Critical
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "1",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2847418",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article focuses on the design of safe and
                 attack-resilient Cyber-Physical Systems (CPS) equipped
                 with multiple sensors measuring the same physical
                 variable. A malicious attacker may be able to disrupt
                 system performance through compromising a subset of
                 these sensors. Consequently, we develop a precise and
                 resilient sensor fusion algorithm that combines the
                 data received from all sensors by taking into account
                 their specified precisions. In particular, we note that
                 in the presence of a shared bus, in which messages are
                 broadcast to all nodes in the network, the attacker's
                 impact depends on what sensors he has seen before
                 sending the corrupted measurements. Therefore, we
                 explore the effects of communication schedules on the
                 performance of sensor fusion and provide theoretical
                 and experimental results advocating for the use of the
                 Ascending schedule, which orders sensor transmissions
                 according to their precision starting from the most
                 precise. In addition, to improve the accuracy of the
                 sensor fusion algorithm, we consider the dynamics of
                 the system in order to incorporate past measurements at
                 the current time. Possible ways of mapping sensor
                 measurement history are investigated in the article and
                 are compared in terms of the confidence in the final
                 output of the sensor fusion. We show that the precision
                 of the algorithm using history is never worse than the
                 no-history one, while the benefits may be significant.
                 Furthermore, we utilize the complementary properties of
                 the two methods and show that their combination results
                 in a more precise and resilient algorithm. Finally, we
                 validate our approach in simulation and experiments on
                 a real unmanned ground robot.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2016:ESB,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Science of the Big and Small and Embedded
                 Computing Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "21:1--21:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2901293",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dutt:2016:TSE,
  author =       "Nikil Dutt and Axel Jantsch and Santanu Sarma",
  title =        "Toward Smart Embedded Systems: a Self-aware
                 System-on-Chip {(SoC)} Perspective",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "22:1--22:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2872936",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems must address a multitude of
                 potentially conflicting design constraints such as
                 resiliency, energy, heat, cost, performance, security,
                 etc., all in the face of highly dynamic operational
                 behaviors and environmental conditions. By
                 incorporating elements of intelligence, the hope is
                 that the resulting ``smart'' embedded systems will
                 function correctly and within desired constraints in
                 spite of highly dynamic changes in the applications and
                 the environment, as well as in the underlying
                 software/hardware platforms. Since terms related to
                 ``smartness'' (e.g., self-awareness, self-adaptivity,
                 and autonomy) have been used loosely in many software
                 and hardware computing contexts, we first present a
                 taxonomy of ``self-x'' terms and use this taxonomy to
                 relate major ``smart'' software and hardware computing
                 efforts. A major attribute for smart embedded systems
                 is the notion of self-awareness that enables an
                 embedded system to monitor its own state and behavior,
                 as well as the external environment, so as to adapt
                 intelligently. Toward this end, we use a System-on-Chip
                 perspective to show how the CyberPhysical
                 System-on-Chip (CPSoC) exemplar platform achieves
                 self-awareness through a combination of cross-layer
                 sensing, actuation, self-aware adaptations, and online
                 learning. We conclude with some thoughts on open
                 challenges and research directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vinco:2016:ESI,
  author =       "Sara Vinco and Christian Pilato",
  title =        "Editorial: Special Issue on Innovative Design Methods
                 for Smart Embedded Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "22:1--22:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2885505",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Oneto:2016:LHF,
  author =       "Luca Oneto and Sandro Ridella and Davide Anguita",
  title =        "Learning Hardware-Friendly Classifiers Through
                 Algorithmic Stability",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "23:1--23:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2836165",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most state-of-the-art machine-learning (ML) algorithms
                 do not consider the computational constraints of
                 implementing the learned model on embedded devices.
                 These constraints are, for example, the limited depth
                 of the arithmetic unit, the memory availability, or the
                 battery capacity. We propose a new learning framework,
                 the Algorithmic Risk Minimization (ARM), which relies
                 on Algorithmic-Stability, and includes these
                 constraints inside the learning process itself. ARM
                 allows one to train advanced resource-sparing ML models
                 and to efficiently deploy them on smart embedded
                 systems. Finally, we show the advantages of our
                 proposal on a smartphone-based Human Activity
                 Recognition application by comparing it to a
                 conventional ML approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Das:2016:AHR,
  author =       "Anup Das and Bashir M. Al-Hashimi and Geoff V.
                 Merrett",
  title =        "Adaptive and Hierarchical Runtime Manager for
                 Energy-Aware Thermal Management of Embedded Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "24:1--24:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2834120",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded systems execute applications, which
                 interact with the operating system and hardware
                 differently depending on the type of workload. These
                 cross-layer interactions result in wide variations of
                 the chip-wide thermal profile. In this article, a
                 reinforcement learning-based runtime manager is
                 proposed that guarantees application-specific
                 performance requirements and controls the POSIX thread
                 allocation and voltage/frequency scaling for
                 energy-efficient thermal management. This controls
                 three thermal aspects: peak temperature, average
                 temperature, and thermal cycling. Contrary to existing
                 learning-based runtime approaches that optimize energy
                 and temperature individually, the proposed runtime
                 manager is the first approach to combine the two
                 objectives, simultaneously addressing all three thermal
                 aspects. However, determining thread allocation and
                 core frequencies to optimize energy and temperature is
                 an NP-hard problem. This leads to exponential growth in
                 the learning table (significant memory overhead) and a
                 corresponding increase in the exploration time to learn
                 the most appropriate thread allocation and core
                 frequency for a particular application workload. To
                 confine the learning space and to minimize the learning
                 cost, the proposed runtime manager is implemented in a
                 two-stage hierarchy: a heuristic-based thread
                 allocation at a longer time interval to improve thermal
                 cycling, followed by a learning-based hardware
                 frequency selection at a much finer interval to improve
                 average temperature, peak temperature, and energy
                 consumption. This enables finer control on temperature
                 in an energy-efficient manner while simultaneously
                 addressing scalability, which is a crucial aspect for
                 multi-/many-core embedded systems. The proposed
                 hierarchical runtime manager is implemented for Linux
                 running on nVidia's Tegra SoC, featuring four ARM
                 Cortex-A15 cores. Experiments conducted with a range of
                 embedded and cpu-intensive applications demonstrate
                 that the proposed runtime manager not only reduces
                 energy consumption by an average 15\% with respect to
                 Linux but also improves all the thermal aspects-average
                 temperature by 14${}^\circ $C, peak temperature by
                 16${}^\circ $C, and thermal cycling by 54\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gu:2016:RTF,
  author =       "Xiaoqi Gu and Yongxin Zhu and Shengyan Zhou and
                 Chaojun Wang and Meikang Qiu and Guoxing Wang",
  title =        "A Real-Time {FPGA-Based} Accelerator for {ECG}
                 Analysis and Diagnosis Using Association-Rule Mining",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "25:1--25:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2821508",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Telemedicine provides health care services at a
                 distance using information and communication
                 technologies, which intends to be a solution to the
                 challenges faced by current health care systems with
                 growing numbers of population, increased demands from
                 patients, and shortages in human resources. Recent
                 advances in telemedicine, especially in wearable
                 electrocardiogram (ECG) monitors, call for more
                 intelligent and efficient automatic ECG analysis and
                 diagnostic systems. We present a streaming architecture
                 implemented on Field-Programmable Gate Arrays (FPGAs)
                 to accelerate real-time ECG signal analysis and
                 diagnosis in a pipelining and parallel way.
                 Association-rule mining is employed to generate early
                 diagnostic results by matching features of ECG with
                 generated association rules. To improve performance of
                 the processing, we propose a hardware-oriented
                 data-mining algorithm named Bit\_Q\_Apriori. The
                 corresponding hardware implementation indicates a good
                 scalability and outperforms other hardware designs in
                 terms of performance, throughput, and hardware cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Johnson:2016:RTR,
  author =       "Taylor T. Johnson and Stanley Bak and Marco Caccamo
                 and Lui Sha",
  title =        "Real-Time Reachability for Verified Simplex Design",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "26:1--26:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2723871",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Simplex architecture ensures the safe use of an
                 unverifiable complex/smart controller by using it in
                 conjunction with a verified safety controller and
                 verified supervisory controller (switching logic). This
                 architecture enables the safe use of smart,
                 high-performance, untrusted, and complex control
                 algorithms to enable autonomy without requiring the
                 smart controllers to be formally verified or certified.
                 Simplex incorporates a supervisory controller that will
                 take over control from the unverified complex/smart
                 controller if it misbehaves and use a safety
                 controller. The supervisory controller should (1)
                 guarantee that the system never enters an unsafe state
                 (safety), but should also (2) use the complex/smart
                 controller as much as possible (minimize conservatism).
                 The problem of precisely and correctly defining the
                 switching logic of the supervisory controller has
                 previously been considered either using a
                 control-theoretic optimization approach or through an
                 offline hybrid-systems reachability computation. In
                 this work, we show that a combined online/offline
                 approach that uses aspects of the two earlier methods,
                 along with a real-time reachability computation, also
                 maintains safety, but with significantly less
                 conservatism, allowing the complex controller to be
                 used more frequently. We demonstrate the advantages of
                 this unified approach on a saturated inverted pendulum
                 system, in which the verifiable region of attraction is
                 over twice as large compared to the earlier approach.
                 Additionally, to validate the claims that the real-time
                 reachability approach may be implemented on embedded
                 platforms, we have ported and conducted embedded
                 hardware studies using both ARM processors and Atmel
                 AVR microcontrollers. This is the first ever
                 demonstration of a hybrid-systems reachability
                 computation in real time on actual embedded platforms,
                 which required addressing significant technical
                 challenges.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baka:2016:NSS,
  author =       "Maria-Iro Baka and Francky Catthoor and Dimitrios
                 Soudris",
  title =        "Near-Static Shading Exploration for Smart Photovoltaic
                 Module Topologies Based on Snake-like Configurations",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "27:1--27:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2837026",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Variable shading due to clouds and nearby objects has
                 a severe impact on the energy yield of photovoltaic
                 installations. Due to the industry's standard of
                 permanently series-connected cells in a photovoltaic
                 (PV) module, partial shading creates mismatches between
                 the Current-Voltage (I-V) characteristics of cells.
                 This article proposes an alternative configurable
                 intramodule cell interconnection topology whereby cell
                 connections can be adapted during operation to allow an
                 optimized power production. The proposed configurable
                 topology outperforms significantly a conventional 10 $
                 \times $ 6 module under heavy shade. Moreover, this is
                 achieved in a quite flexible way and with negligible
                 overhead under uniform irradiation conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Malek:2016:RRQ,
  author =       "Alirad Malek and Ioannis Sourdis and Stavros Tzilis
                 and Yifan He and Gerard Rauwerda",
  title =        "{RQNoC}: a Resilient Quality-of-Service
                 Network-on-Chip with Service Redirection",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "28:1--28:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2846097",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we describe RQNoC, a service-oriented
                 Network-on-Chip (NoC) resilient to permanent faults. We
                 characterize the network resources based on the
                 particular service that they support and, when faulty,
                 bypass them, allowing the respective traffic class to
                 be redirected. We propose two alternatives for service
                 redirection, each having different advantages and
                 disadvantages. The first one, Service Detour, uses
                 longer alternative paths through resources of the same
                 service to bypass faulty network parts, keeping traffic
                 classes isolated. The second approach, Service Merge,
                 uses resources of other services providing shorter
                 paths but allowing traffic classes to interfere with
                 each other. The remaining network resources that are
                 common for all services employ additional mechanisms
                 for tolerating faults. Links tolerate faults using
                 additional spare wires combined with a flit-shifting
                 mechanism, and the router control is protected with
                 Triple-Modular-Redundancy (TMR). The proposed RQNoC
                 network designs are implemented in 65nm technology and
                 evaluated in terms of performance, area, power
                 consumption, and fault tolerance. Service Detour
                 requires 9\% more area and consumes 7.3\% more power
                 compared to a baseline network, not tolerant to faults.
                 Its packet latency and throughput is close to the
                 fault-free performance at low-fault densities, but
                 fault tolerance and performance drop substantially for
                 8 or more network faults. Service Merge requires 22\%
                 more area and 27\% more power than the baseline and has
                 a 9\% slower clock. Compared to a fault-free network, a
                 Service Merge RQNoC with up to 32 faults has increased
                 packet latency up to 1.5 to 2.4$ \times $ and reduced
                 throughput to 70\% or 50\%. However, it delivers
                 substantially better fault tolerance, having a mean
                 network connectivity above 90\% even with 32 network
                 faults versus 41\% of a Service Detour network.
                 Combining Serve Merge and Service Detour improves fault
                 tolerance, further sustaining a higher number of
                 network faults and reduced packet latency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ienne:2016:GES,
  author =       "Paolo Ienne and Jean-Pierre Talpin",
  title =        "Guest Editorial: Special Issue on Models and
                 Methodologies for System Design",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "29:1--29:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2885503",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Desnos:2016:MRB,
  author =       "Karol Desnos and Maxime Pelcat and Jean-Fran{\c{c}}ois
                 Nezan and Slaheddine Aridhi",
  title =        "On Memory Reuse Between Inputs and Outputs of Dataflow
                 Actors",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "30:1--30:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2871744",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces a new technique to minimize
                 the memory footprints of Digital Signal Processing
                 (DSP) applications specified with Synchronous Dataflow
                 (SDF) graphs and implemented on shared-memory
                 Multiprocessor System-on-Chip (MPSoCs). In addition to
                 the SDF specification, which captures data dependencies
                 between coarse-grained tasks called actors, the
                 proposed technique relies on two optional inputs
                 abstracting the internal data dependencies of actors:
                 annotations of the ports of actors, and script-based
                 specifications of merging opportunities between input
                 and output buffers of actors. Experimental results on a
                 set of applications show a reduction of the memory
                 footprint by 48\% compared to state-of-the-art
                 minimization techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nouri:2016:ARA,
  author =       "Ayoub Nouri and Marius Bozga and Anca Molnos and Axel
                 Legay and Saddek Bensalem",
  title =        "{ASTROLABE}: a Rigorous Approach for System-Level
                 Performance Modeling and Analysis",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "31:1--31:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2885498",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Building abstract system-level models that faithfully
                 capture performance and functional behavior for
                 embedded systems design is challenging. Unlike
                 functional aspects, performance details are rarely
                 available during the early design phases, and no clear
                 method is known to characterize them. Moreover, once
                 such models are built, they are inherently complex as
                 they mix software models, hardware constraints, and
                 environment abstractions. Their analysis by using
                 traditional performance evaluation methods is reaching
                 the limit. In this article, we present a systematic
                 approach for building stochastic abstract performance
                 models using statistical inference and model
                 calibration, and we propose statistical model checking
                 as a scalable performance evaluation technique for
                 them.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Butt:2016:DPH,
  author =       "Shahzad Ahmad Butt and Mehdi Roozmeh and Luciano
                 Lavagno",
  title =        "Designing Parameterizable Hardware {IPs} in a
                 Model-Based Design Environment for High-Level
                 Synthesis",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "32:1--32:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2871737",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Model-based hardware design allows one to map a single
                 model to multiple hardware and/or software
                 architectures, essentially eliminating one of the major
                 limitations of manual coding in C or RTL. Model-based
                 design for hardware implementation has traditionally
                 offered a limited set of microarchitectures, which are
                 typically suitable only for some application scenarios.
                 In this article we illustrate how digital signal
                 processing (DSP) algorithms can be modeled as flexible
                 intellectual property blocks to be used within the
                 popular Simulink model-based design environment. These
                 blocks are written in C and are designed for both
                 functional simulation and hardware implementation,
                 including architectural design space exploration and
                 hardware implementation through high-level synthesis. A
                 key advantage of our modeling approach is that the very
                 same bit-accurate model is used for simulation and
                 high-level synthesis. To prove the feasibility of our
                 proposed approach, we modeled a fast Fourier transform
                 (FFT) algorithm and synthesized it for different DSP
                 applications with very different performance and cost
                 requirements. We also implemented a
                 high-level-synthesis (HLS) intellectual property (IP)
                 generator that can generate flexible FFT HLS-IP blocks
                 that can be mapped to multiple
                 micro-/macroarchitectures, to enable design space
                 exploration as well as being used for functional
                 simulation in the Simulink environment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Le:2016:CBR,
  author =       "Thi Thieu Hoa Le and Roberto Passerone and Uli
                 Fahrenberg and Axel Legay",
  title =        "Contract-Based Requirement Modularization via
                 Synthesis of Correct Decompositions",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "33:1--33:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2885752",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In distributed development of modern systems,
                 contracts play a vital role in ensuring
                 interoperability of components and adherence to
                 specifications. It is therefore often desirable to
                 verify the satisfaction of an overall property
                 represented as a contract, given the satisfaction of
                 smaller properties also represented as contracts. When
                 the verification result is negative, designers must
                 face the issue of refining the subproperties and
                 components. This is an instance of the classical
                 synthesis problems: ``can we construct a model that
                 satisfies some given specification?'' In this work, we
                 propose two strategies enabling designers to synthesize
                 or refine a set of contracts so that their composition
                 satisfies a given contract. We develop a generic
                 algebraic method and show how it can be applied in
                 different contract models to support top-down
                 component-based development of distributed systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rho:2016:GEC,
  author =       "Seungmin Rho and Wenny Rahayu and Geyong Min",
  title =        "Guest Editorial: Challenges of Embedded Systems as
                 They Evolve into {M2M}, {Internet of Things}",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "34:1--34:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2886417",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zeng:2016:SLM,
  author =       "Jing Zeng and Laurence T. Yang and Jianhua Ma",
  title =        "A System-Level Modeling and Design for
                 Cyber-Physical-Social Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "35:1--35:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2834119",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The design of cyber-physical-social systems (CPSS) is
                 a novel and challenging research field due that it
                 emphasizes the deep fusion of cyberspace, physical
                 space, and social space. In this article, we extend our
                 previously proposed system-level design framework [Zeng
                 et al. 2015] to tailor it to the needs of social
                 scenario of multiple users. A hierarchical Petri
                 net-based model and social flow are presented to extend
                 the control flow and formally describe the social
                 interactions of multiple users, respectively. By using
                 the extended model, the system-level optimization for
                 CPSS can be achieved by the improved design flow.
                 Specifically, object emplacement and user satisfaction
                 are further extended into the social environment. Also
                 maximal power estimation algorithm is improved,
                 leveraging the extended intermediate representation
                 model. Finally, we use a smart office case to
                 demonstrate the feasibility and effectiveness of our
                 improved design approach for multiple users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2016:IRW,
  author =       "Daqiang Zhang and Jiafu Wan and Zongjian He and
                 Shengjie Zhao and Ke Fan and Sang Oh Park and Zhibin
                 Jiang",
  title =        "Identifying Region-Wide Functions Using Urban Taxicab
                 Trajectories",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "36:1--36:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2821507",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the urban development and enlargement, various
                 regions such as residential zones and administrative
                 districts now appear as parts of cities. People exhibit
                 different mobility patterns in each region, which is
                 closely relevant to region-wide functions. In this
                 article, we propose a scheme to discover region-wide
                 functions using large-scale Shanghai taxicab
                 trajectories that capture enormous traces for more than
                 13,000 taxicabs over a period of about 3 years. We
                 investigate these taxicab trajectories and conduct an
                 extensive preliminary study. Then, we divide the city
                 into disjointed regions using Voronoi decomposition. By
                 incorporating people's pick-up and drop-off
                 information, we refine the Voronoi partitioning results
                 to identify region-wide functional areas. Finally, we
                 study people's movement frequency on weekdays and
                 weekends for every kind of urban functional regions. We
                 also look into human mobility within or across the
                 identified urban functional regions. Experimental
                 results show that human movement is bounded with the
                 function of urban regions, and more than 90\% of people
                 visit neighboring (less than 20km travel distance)
                 functional regions with high probability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ji:2016:CLO,
  author =       "Wen Ji and Bo-Wei Chen and Xiangdong Wang and Haiyong
                 Luo and Mucheol Kim and Yiqiang Chen",
  title =        "Cross-Layer Opportunistic Scheduling for
                 Device-to-Device Video Multicast Services",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "37:1--37:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2856034",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we address the problem of how to make
                 the wireless device-to-device (D2D) video multicast
                 systems have better quality provision with
                 consideration of internet-of-things (IoT) applications.
                 We propose an opportunistic transmission and fair
                 resource allocation framework, including joint
                 application-layer and physical-layer transmission and
                 optimization. First, we use a parallel subchannels
                 structure by concatenating the Fountain codes and
                 diversity-embedded space-time block codes to provide
                 reliable and flexible transmission in heterogeneous
                 circumstances. Second, we exploit the quality of
                 heterogeneous user experience (quality of experience)
                 metric under D2D video multicast systems, with
                 consideration of various channel states, device
                 capability, video content urgency, and the number of
                 demanding users. Third, we formulate reliable multiple
                 video streams broadcasting to heterogeneous devices as
                 an aggregate maximum utility achieving problem, and we
                 use opportunistic scheduling to select suitable users
                 in each transmission interval to improve the
                 broadcasting utility. Fourth, we use the utility fair
                 scheme to guide rate allocation among multicontent
                 video multicast. Extensive performance comparison and
                 analysis are presented to demonstrate efficiency of the
                 proposed solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2016:SMA,
  author =       "Lu Liu and Nick Antonopoulos and Minghui Zheng and
                 Yongzhao Zhan and Zhijun Ding",
  title =        "A Socioecological Model for Advanced Service Discovery
                 in Machine-to-Machine Communication Networks",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "38:1--38:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2811264",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The new development of embedded systems has the
                 potential to revolutionize our lives and will have a
                 significant impact on future Internet of Thing (IoT)
                 systems if required services can be automatically
                 discovered and accessed at runtime in
                 Machine-to-Machine (M2M) communication networks. It is
                 a crucial task for devices to perform timely service
                 discovery in a dynamic environment of IoTs. In this
                 article, we propose a Socioecological Service Discovery
                 (SESD) model for advanced service discovery in M2M
                 communication networks. In the SESD network, each
                 device can perform advanced service search to
                 dynamically resolve complex enquires and autonomously
                 support and co-operate with each other to quickly
                 discover and self-configure any services available in
                 M2M communication networks to deliver a real-time
                 capability. The proposed model has been systematically
                 evaluated and simulated in a dynamic M2M environment.
                 The experiment results show that SESD can self-adapt
                 and self-organize themselves in real time to generate
                 higher flexibility and adaptability and achieve a
                 better performance than the existing methods in terms
                 of the number of discovered service and a better
                 efficiency in terms of the number of discovered
                 services per message.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmad:2016:EMB,
  author =       "Awais Ahmad and Anand Paul and Mazhar Rathore and
                 Hangbae Chang",
  title =        "An Efficient Multidimensional Big Data Fusion Approach
                 in Machine-to-Machine Communication",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "39:1--39:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2834118",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Machine-to-Machine communication (M2M) is nowadays
                 increasingly becoming a world-wide network of
                 interconnected devices uniquely addressable, via
                 standard communication protocols. The prevalence of M2M
                 is bound to generate a massive volume of heterogeneous,
                 multisource, dynamic, and sparse data, which leads a
                 system towards major computational challenges, such as,
                 analysis, aggregation, and storage. Moreover, a
                 critical problem arises to extract the useful
                 information in an efficient manner from the massive
                 volume of data. Hence, to govern an adequate quality of
                 the analysis, diverse and capacious data needs to be
                 aggregated and fused. Therefore, it is imperative to
                 enhance the computational efficiency for fusing and
                 analyzing the massive volume of data. Therefore, to
                 address these issues, this article proposes an
                 efficient, multidimensional, big data analytical
                 architecture based on the fusion model. The basic
                 concept implicates the division of magnitudes
                 (attributes), i.e., big datasets with complex
                 magnitudes can be altered into smaller data subsets
                 using five levels of the fusion model that can be
                 easily processed by the Hadoop Processing Server,
                 resulting in formalizing the problem of feature
                 extraction applications using earth observatory system,
                 social networking, or networking applications.
                 Moreover, a four-layered network architecture is also
                 proposed that fulfills the basic requirements of the
                 analytical architecture. The feasibility and efficiency
                 of the proposed algorithms used in the fusion model are
                 implemented on Hadoop single-node setup on UBUNTU 14.04
                 LTS core i5 machine with 3.2GHz processor and 4GB
                 memory. The results show that the proposed system
                 architecture efficiently extracts various features
                 (such as land and sea) from the massive volume of
                 satellite data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2016:UMA,
  author =       "Eui-Jik Kim and Jung-Hyok Kwon and Ken Choi and
                 Taeshik Shon",
  title =        "Unified Medium Access Control Architecture for
                 Resource-Constrained Machine-to-Machine Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "2",
  pages =        "40:1--40:??",
  month =        may,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2876958",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jun 8 09:43:30 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In capillary machine-to-machine (M2M) communications,
                 which is being considered as a feasible network
                 solution for M2M applications, because of physical
                 resource constraints and deployment conditions, an
                 energy-efficient and scalable medium access control
                 (MAC) protocol is crucial for numerous M2M devices to
                 concurrently access wireless channels. Therefore, this
                 paper presents a unified MAC layer architecture for
                 resource-constrained M2M devices in capillary M2M
                 networks [named as resource-constrained MAC
                 architecture (RCMA)], which has a unified (monolithic)
                 framework consisting of essential functional components
                 to support MAC-related operations of M2M devices:
                 multi-channel hybrid MAC (McHM), logical link control
                 (LLC), time synchronizer (TS), and device on--off
                 scheduler (DO2S). McHM provides a baseline MAC protocol
                 for an entire capillary M2M system that combines the
                 benefit of both contention-based carrier sense multiple
                 access and schedule-based time division multiple access
                 schemes, whereas the other three components help in the
                 McHM operations. To demonstrate the effectiveness of
                 the RCMA, we implement the whole stack using the
                 QualNet simulator. Experimental results show that the
                 RCMA outperforms the conventional ZigBee stack in terms
                 of energy efficiency and scalability, even under heavy
                 traffic conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Franchino:2016:BOE,
  author =       "Gianluca Franchino and Giorgio Buttazzo and Mauro
                 Marinoni",
  title =        "Bandwidth Optimization and Energy Management in
                 Real-Time Wireless Networks",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2851498",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded systems operated by battery and
                 interacting with the environment, a fundamental issue
                 is the enforcement of real-time and energy constraints
                 to guarantee a desired lifetime with a given
                 performance. A lot of research has focused on energy
                 management at the communication level; however, not
                 many authors considered both real-time and energy
                 requirements in wireless communication systems. This
                 article proposes El-SMan, a power-aware framework
                 working in combination with MAC layer communication
                 protocols for maximizing battery lifetime in wireless
                 networks of embedded systems with real-time
                 constraints. Exploiting the flexibility in bandwidth
                 requirements, El-SMan adapts stream parameters to
                 balance performance versus energy consumption, taking
                 both lifetime and message deadlines into account.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2016:EFI,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Fence Itself Grazing the Field --- Security
                 from the Sentries",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2953045",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2016:UUP,
  author =       "Yichuan Wang and Xin Liu and Cheng-Hsin Hsu",
  title =        "{UPDATE}: {User-Profile-Driven Adaptive TransfEr} for
                 Mobile Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2889489",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Existing channel-aware scheduling work has mainly
                 focused on scheduling in small timescales, that is,
                 tens to hundreds of seconds. We propose to use
                 long-term user profiles to provide useful statistical
                 information on future network conditions in large
                 timescales. We design scheduling algorithms based on
                 Markov decision theory. We collect and use a large set
                 of real-life traces from the general public. Extensive
                 trace-driven evaluations show that many real mobile
                 users can benefit from our framework. In addition, we
                 compare our framework against state-of-the-art
                 algorithms and observe significant performance
                 differences because the existing algorithms were not
                 designed for the large timescale scenario.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sotiriou-Xanthopoulos:2016:IEV,
  author =       "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis
                 and Kostas Siozios and George Economakos and Dimitrios
                 Soudris",
  title =        "An Integrated Exploration and Virtual Platform
                 Framework for Many-Accelerator Heterogeneous Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2866578",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The recent advent of many-accelerator systems-on-chip
                 (SoC), driven by the need for maximizing throughput and
                 power efficiency, has led to an exponential increase in
                 the hardware/software co-design complexity. The reason
                 of this increase is that the designer has to explore a
                 vast number of architectural parameter combinations for
                 each single accelerator, as well as inter-accelerator
                 configuration combinations under specific area,
                 throughput, and power constraints, given that each
                 accelerator has different computational requirements.
                 In such a case, the design space size explodes. Thus,
                 existing design space exploration (DSE) techniques give
                 poor-quality solutions, as the design space cannot be
                 adequately covered in a fair time. This problem is
                 aggravated by the very long simulation time of the
                 many-accelerator virtual platforms (VPs). This article
                 addresses these design issues by (a) presenting a
                 virtual prototyping solution that decreases the
                 exploration time by enabling the evaluation of multiple
                 configurations per VP simulation and (b) proposing a
                 DSE methodology that efficiently explores the design
                 space of many-accelerator systems. With the use of two
                 fully developed use cases, namely an H.264 decoding
                 server for multiple video streams and a parallelized
                 denoising system for MRI scans, we show that the
                 proposed DSE methodology either leads to Pareto points
                 that dominate over those of a typical DSE scenario or
                 finds new solutions that might not be found by the
                 typical DSE. In addition, the proposed virtual
                 prototyping solution leads to DSE runtime reduction
                 reaching 10 $ \times $ for H.264 and 5 $ \times $ for
                 Rician denoise.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Beretta:2016:PCA,
  author =       "Ivan Beretta and Vincenzo Rana and Abdulkadir Akin and
                 Alessandro Antonio Nacci and Donatella Sciuto and David
                 Atienza",
  title =        "Parallelizing the Chambolle Algorithm for
                 Performance-Optimized Mapping on {FPGA} Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2851497",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The performance and the efficiency of recent computing
                 platforms have been deeply influenced by the widespread
                 adoption of hardware accelerators, such as graphics
                 processing units (GPUs) or field-programmable gate
                 arrays (FPGAs), which are often employed to support the
                 tasks of general-purpose processors (GPPs). One of the
                 main advantages of these accelerators over their
                 sequential counterparts (GPPs) is their ability to
                 perform massive parallel computation. However, to
                 exploit this competitive edge, it is necessary to
                 extract the parallelism from the target algorithm to be
                 executed, which generally is a very challenging task.
                 This concept is demonstrated, for instance, by the poor
                 performance achieved on relevant multimedia algorithms,
                 such as Chambolle, which is a well-known algorithm
                 employed for the optical flow estimation. The
                 implementations of this algorithm that can be found in
                 the state of the art are generally based on GPUs but
                 barely improve the performance that can be obtained
                 with a powerful GPP. In this article, we propose a
                 novel approach to extract the parallelism from
                 computation-intensive multimedia algorithms, which
                 includes an analysis of their dependency schema and an
                 assessment of their data reuse. We then perform a
                 thorough analysis of the Chambolle algorithm, providing
                 a formal proof of its inner data dependencies and
                 locality properties. Then, we exploit the
                 considerations drawn from this analysis by proposing an
                 architectural template that takes advantage of the
                 fine-grained parallelism of FPGA devices. Moreover,
                 since the proposed template can be instantiated with
                 different parameters, we also propose a design metric,
                 the expansion rate, to help the designer in the
                 estimation of the efficiency and performance of the
                 different instances, making it possible to select the
                 right one before the implementation phase. We finally
                 show, by means of experimental results, how the
                 proposed analysis and parallelization approach leads to
                 the design of efficient and high-performance FPGA-based
                 implementations that are orders of magnitude faster
                 than the state-of-the-art ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nagar:2016:FPW,
  author =       "Kartik Nagar and Y. N. Srikant",
  title =        "Fast and Precise Worst-Case Interference Placement for
                 Shared Cache Analysis",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2854151",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time systems require a safe and precise estimate
                 of the worst-case execution time (WCET) of programs. In
                 multicore architectures, the precision of a program's
                 WCET estimate highly depends on the precision of its
                 predicted shared cache behavior. Prediction of shared
                 cache behavior is difficult due to the uncertain timing
                 of interfering shared cache accesses made by programs
                 running on other cores. Given the assignment of
                 programs to cores, the worst-case interference
                 placement (WCIP) technique tries to find the worst-case
                 timing of interfering accesses, which would cause the
                 maximum number of cache misses on the worst case path
                 of the program, to determine its WCET. Although WCIP
                 generates highly precise WCET estimates, the current
                 ILP-based approach is also known to have very high
                 analysis time. In this work, we investigate the WCIP
                 problem in detail and determine its source of hardness.
                 We show that performing WCIP is an NP-hard problem by
                 reducing the 0-1 knapsack problem. We use this
                 observation to make simplifying assumptions, which make
                 the WCIP problem tractable, and we propose an
                 approximate greedy technique for WCIP, whose time
                 complexity is linear in the size of the program. We
                 perform extensive experiments to show that the
                 assumptions do not affect the precision of WCIP but
                 result in significant reduction of analysis time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Starke:2016:EDV,
  author =       "Renan Augusto Starke and Andreu Carminati and
                 R{\^o}mulo {Silva De Oliveira}",
  title =        "Evaluating the Design of a {VLIW} Processor for
                 Real-Time Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2889490",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Nowadays, many real-time applications are very complex
                 and as the complexity and the requirements of those
                 systems become more demanding, more hardware processing
                 capacity is necessary. Unfortunately, the correct
                 functioning of real-time systems depends not only on
                 the logically correct response but also on the time
                 when it is produced. General-purpose processor design
                 fails to deliver analyzability due to their
                 nondeterministic behavior caused by the use of cache
                 memories, dynamic branch prediction, speculative
                 execution, and out-of-order pipelines. In this article,
                 we investigate the pipeline performance of Very Long
                 Instruction Word (VLIW) architectures for real-time
                 systems with an in-order pipeline considering
                 Worst-Case Execution Time (WCET) performance.
                 Techniques on obtaining the WCET of VLIW machines are
                 also considered and we make a quantification on how
                 important are hardware techniques such as static branch
                 prediction, predication, and pipeline speed of complex
                 operations such as memory access and multiplication for
                 high-performance real-time systems. The memory
                 hierarchy is out of the scope of this article and we
                 used a classic deterministic structure formed by a
                 direct mapped instruction cache and a data scratchpad
                 memory. A VLIW prototype was implemented in VHDL from
                 scratch considering the HP VLIW ST231 ISA. We also show
                 some compiler insights and we use a representative
                 subset of the M{\"a}lardalen's WCET benchmarks for
                 validation and performance quantification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2016:SMR,
  author =       "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim and
                 Seungryoul Maeng",
  title =        "{SmartLMK}: a Memory Reclamation Scheme for Improving
                 User-Perceived App Launch Time",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2894755",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As the mobile computing environment evolves, users
                 demand high-quality apps and better user experience.
                 Consequently, memory demand in mobile devices has
                 soared. Device manufacturers have fulfilled the demand
                 by equipping devices with more RAM. However, such a
                 hardware approach is only a temporary solution and does
                 not scale well in the resource-constrained mobile
                 environment. Meanwhile, mobile systems adopt a new app
                 life cycle and a memory reclamation scheme tailored for
                 the life cycle. When a user leaves an app, the app is
                 not terminated but cached in memory as long as there is
                 enough free memory. If the free memory gets low, a
                 victim app is terminated and the associated memory to
                 the app is reclaimed. This process-level approach has
                 worked well in the mobile environment. However, user
                 experience can be impaired severely because the victim
                 selection policy does not consider the user experience.
                 In this article, we propose a novel memory reclamation
                 scheme called SmartLMK. SmartLMK minimizes the impact
                 of the process-level reclamation on user experience.
                 The worthiness to keep an app in memory is modeled by
                 means of user-perceived app launch time and app usage
                 statistics. The memory footprint and impending memory
                 demand are estimated from the history of the memory
                 usage. Using these values and memory models, SmartLMK
                 picks up the least valuable apps and terminates them at
                 once. Our evaluation on a real Android-based smartphone
                 shows that SmartLMK efficiently distinguishes the
                 valuable apps among cached apps and keeps those
                 valuable apps in memory. As a result, the
                 user-perceived app launch time can be improved by up to
                 13.2\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2016:APA,
  author =       "Dongwon Kim and Yohan Chon and Wonwoo Jung and Yungeun
                 Kim and Hojung Cha",
  title =        "Accurate Prediction of Available Battery Time for
                 Mobile Applications",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2875423",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy consumption in mobile devices is an important
                 issue for both system developers and users. Users are
                 aware of the battery-related information of their
                 mobile devices and tend to take appropriate actions to
                 increase the battery life. In this article, we propose
                 a framework that accurately estimates the remaining
                 battery time of applications at runtime. The framework
                 profiles the power behavior of applications tied with
                 activated hardware components and estimates the
                 remaining battery budget utilizing the battery-related
                 data provided by the device. The experiments validate
                 that our method predicts the remaining battery time for
                 applications with approximately 93\% of accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2016:NSC,
  author =       "Rehan Ahmed and Parameswaran Ramanathan and Kewal K.
                 Saluja",
  title =        "Necessary and Sufficient Conditions for Thermal
                 Schedulability of Periodic Real-Time Tasks Under Fluid
                 Scheduling Model",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2883612",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the growing need to address the thermal issues in
                 modern processing platforms, various performance
                 throttling schemes have been proposed in literature
                 (DVFS, clock gating, and so on) to manage temperature.
                 In real-time systems, such methods are often
                 unacceptable, as they can result in potentially
                 catastrophic deadline misses. As a result, real-time
                 scheduling research has recently focused on developing
                 algorithms that meet the compute deadline while
                 satisfying power and thermal constraints. Basic bounds
                 that can determine if a set of tasks can be scheduled
                 or not were established in the 1970s based on
                 computation utilization. Similar results for thermal
                 bounds have not been forthcoming. In this article, we
                 address the problem of thermal constraint
                 schedulability of tasks and derive necessary and
                 sufficient conditions for thermal feasibility of
                 periodic tasksets on a unicore system. We prove that a
                 GPS-inspired fluid scheduling scheme is thermally
                 optimal when context switch/preemption overhead is
                 ignored. Extension of sufficient conditions to a
                 nonfluid model is still an open problem. We also extend
                 some of the results to a multicore processing
                 environment. We demonstrate the efficacy of our results
                 through extensive simulations. We also evaluate the
                 proposed concepts on a hardware testbed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2016:USS,
  author =       "Fang Li and Jiafu Wan and Ping Zhang and Di Li and
                 Daqiang Zhang and Keliang Zhou",
  title =        "Usage-Specific Semantic Integration for Cyber-Physical
                 Robot Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2873057",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The multidisciplinary nature and time criticality of
                 computing in Cyber-Physical Robot Systems (CPRS) makes
                 it significantly different from traditional computer
                 systems. This article attempts to create a
                 usage-specific language called Cyber-Physical Robot
                 Language (CPRL), which supports the CPRS design and
                 implementation in an integrative and swift way.
                 Multiview description and integration strategies as
                 well as formal execution semantics for usage-specific
                 simulation and verification are outlined. A graphic
                 unified environment for CPRS modeling is supplied, in
                 which several tools are integrated. A 6-DOF distributed
                 robot system development in the environment is
                 presented. The approach is an attempt to support CPRS
                 design in an effective way, at the same time
                 guaranteeing the system function and performance
                 requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{An:2016:MBD,
  author =       "Xin An and Eric Rutten and Jean-Philippe Diguet and
                 Abdoulaye Gamati{\'e}",
  title =        "Model-Based Design of Correct Controllers for
                 Dynamically Reconfigurable Architectures",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2873056",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamically reconfigurable hardware has been
                 identified as a promising solution for the design of
                 energy-efficient embedded systems. However, its
                 adoption is limited by costly design effort, including
                 verification and validation, which is even more complex
                 than for nondynamically reconfigurable systems. In this
                 article, we propose a tool-supported formal method to
                 automatically design a correct-by-construction control
                 of the reconfiguration. By representing system
                 behaviors with automata, we exploit automated
                 algorithms to synthesize controllers that safely
                 enforce reconfiguration strategies formulated as
                 properties to be satisfied by control. We design
                 generic modeling patterns for a class of reconfigurable
                 architectures, taking into account both hardware
                 architecture and applications, as well as relevant
                 control objectives. We validate our approach on two
                 case studies implemented on FPGAs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hilal:2016:CEA,
  author =       "Allaa R. Hilal and Otman Basir",
  title =        "A Collaborative Energy-Aware Sensor Management System
                 Using Team Theory",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2910574",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With limited battery supply, power is a scarce
                 commodity in wireless sensor networks. Thus, to prolong
                 the lifetime of the network, it is imperative that the
                 sensor resources are managed effectively. This task is
                 particularly challenging in heterogeneous sensor
                 networks for which decisions and compromises regarding
                 sensing strategies are to be made under time and
                 resource constraints. In such networks, a sensor has to
                 reason about its current state to take actions that are
                 deemed appropriate with respect to its mission, its
                 energy reserve, and the survivability of the overall
                 network. Sensor Management controls and coordinates the
                 use of the sensory suites in a manner that maximizes
                 the success rate of the system in achieving its
                 missions. This article focuses on formulating and
                 developing an autonomous energy-aware sensor management
                 system that strives to achieve network objectives while
                 maximizing its lifetime. A team-theoretic formulation
                 based on the Belief-Desire-Intention (BDI) model and
                 the Joint Intention theory is proposed as a mechanism
                 for effective and energy-aware collaborative
                 decision-making. The proposed system models the
                 collective behavior of the sensor nodes using the Joint
                 Intention theory to enhance sensors' collaboration and
                 success rate. Moreover, the BDI modeling of the sensor
                 operation and reasoning allows a sensor node to adapt
                 to the environment dynamics, situation-criticality
                 level, and availability of its own resources. The
                 simulation scenario selected in this work is the
                 surveillance of the Waterloo International Airport.
                 Various experiments are conducted to investigate the
                 effect of varying the network size, number of threats,
                 threat agility, environment dynamism, as well as
                 tracking quality and energy consumption, on the
                 performance of the proposed system. The experimental
                 results demonstrate the merits of the proposed approach
                 compared to the state-of-the-art centralized approach
                 adapted from Atia et al. [2011] and the localized
                 approach in Hilal and Basir [2015] in terms of energy
                 consumption, adaptability, and network lifetime. The
                 results show that the proposed approach has 12 $ \times
                 $ less energy consumption than that of the popular
                 centralized approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ungerer:2016:PIH,
  author =       "Theo Ungerer and Christian Bradatsch and Martin Frieb
                 and Florian Kluge and J{\"o}rg Mische and Alexander
                 Stegmeier and Ralf Jahr and Mike Gerdes and Pavel
                 Zaykov and Lucie Matusova and Zai Jian Jia Li and
                 Zlatko Petrov and Bert B{\"o}ddeker and Sebastian Kehr
                 and Hans Regler and Andreas Hugl and Christine Rochange
                 and Haluk Ozaktas and Hugues Cass{\'e} and Armelle
                 Bonenfant and Pascal Sainrat and Nick Lay and David
                 George and Ian Broster and Eduardo Qui{\~n}ones and
                 Milos Panic and Jaume Abella and Carles Hernandez and
                 Francisco Cazorla and Sascha Uhrig and Mathias Rohde
                 and Arthur Pyka",
  title =        "Parallelizing Industrial Hard Real-Time Applications
                 for the {parMERASA} Multicore",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2910589",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The EC project parMERASA (Multicore Execution of
                 Parallelized Hard Real-Time Applications Supporting
                 Analyzability) investigated timing-analyzable parallel
                 hard real-time applications running on a predictable
                 multicore processor. A pattern-supported
                 parallelization approach was developed to ease
                 sequential to parallel program transformation based on
                 parallel design patterns that are timing analyzable.
                 The parallelization approach was applied to parallelize
                 the following industrial hard real-time programs: 3D
                 path planning and stereo navigation algorithms
                 (Honeywell International s.r.o.), control algorithm for
                 a dynamic compaction machine (BAUER Maschinen GmbH),
                 and a diesel engine management system (DENSO AUTOMOTIVE
                 Deutschland GmbH). This article focuses on the
                 parallelization approach, experiences during
                 parallelization with the applications, and quantitative
                 results reached by simulation, by static WCET analysis
                 with the OTAWA tool, and by measurement-based WCET
                 analysis with the RapiTime tool.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tanasa:2016:CAP,
  author =       "Bogdan Tanasa and Unmesh D. Bordoloi and Petru Eles
                 and Zebo Peng",
  title =        "Correlation-Aware Probabilistic Timing Analysis for
                 the Dynamic Segment of {FlexRay}",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2870635",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose an analytical framework for probabilistic
                 timing analysis of the event-triggered Dynamic segment
                 of the FlexRay communication protocol. Specifically,
                 our framework computes the Deadline Miss Ratio of each
                 message. The core problem is formulated as a Mixed
                 Integer Linear Program (MILP). Given the intractability
                 of the problem, we also propose several techniques that
                 help to mitigate the running times of our tool. This
                 includes the re-engineering of the problem to run it on
                 GPUs as well as reformulating the MILP itself. Most
                 importantly, we also show how our framework can handle
                 correlations between the queuing events of messages.
                 This is challenging because one cannot apply the
                 convolution operator in the same way as in the case of
                 independent queuing events.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2016:BAU,
  author =       "Ming-Chang Yang and Yuan-Hao Chang and Che-Wei Tsao",
  title =        "Byte-Addressable Update Scheme to Minimize the Energy
                 Consumption of {PCM}-Based Storage Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "55:1--55:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2910590",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In recent years, phase-change memory (PCM) has
                 generated a great deal of interest because of its byte
                 addressability and nonvolatility properties. It is
                 regarded as a good alternative storage medium that can
                 reduce the performance gap between the main memory and
                 the secondary storage in computing systems. However,
                 its high energy consumption on writes is a challenging
                 issue in the design of battery-powered mobile computing
                 systems. To reduce the energy consumption, we exploit
                 the byte addressability and the asymmetric read-write
                 energy/latency of PCM in an energy-efficient update
                 scheme for journaling file systems. We also introduce a
                 concept called the 50\% rule to determine/recommend the
                 best update strategy for block updates. The proposed
                 scheme only writes modified data, instead of the whole
                 updated block, to PCM-based storage devices without
                 extra hardware support. Moreover, it guarantees the
                 sanity/integrity of file systems even if the computing
                 system crashes or there is a power failure during the
                 data update process. We implemented the proposed scheme
                 on the Linux system and conducted a series of
                 experiments to evaluate the scheme. The results are
                 very encouraging.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2016:EIR,
  author =       "Biao Hu and Kai Huang and Gang Chen and Long Cheng and
                 Alois Knoll",
  title =        "Evaluation and Improvements of Runtime Monitoring
                 Methods for Real-Time Event Streams",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "56:1--56:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2890503",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Runtime monitoring is of great importance as a
                 safeguard to guarantee the correctness of system
                 runtime behaviors. Two state-of-the-art methods,
                 dynamic counters and l -repetitive function, were
                 recently developed to tackle the runtime monitoring for
                 real-time systems. While both are reported to be
                 efficient in monitoring arbitrary events, the
                 monitoring performance between them has not yet been
                 evaluated. This article evaluates both methods in
                 depth, to identify their strengths and weaknesses. New
                 methods are proposed to efficiently monitor the
                 many-to-one connections that are abstracted as AND and
                 OR components on multiple inputs. Representative
                 scenarios are used as our case studies to
                 quantitatively demonstrate the evaluations. Both
                 methods are implemented in hardware F pga. The timing
                 overhead and resource usages of implementing the two
                 methods are evaluated.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lu:2016:VCV,
  author =       "Yaojie Lu and Seyedamin Rooholamin and Sotirios G.
                 Ziavras",
  title =        "Vector Coprocessor Virtualization for Simultaneous
                 Multithreading",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "57:1--57:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2898364",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Vector coprocessors (VPs), commonly being assigned
                 exclusively to a single thread/core, are not often
                 performance and energy efficient due to mismatches with
                 the vector needs of individual applications. We present
                 in this article an easy-to-implement VP virtualization
                 technique that, when applied, enables a multithreaded
                 VP to simultaneously execute multiple threads of
                 similar or arbitrary vector lengths to achieve improved
                 aggregate utilization. With a vector register file
                 (VRF) virtualization technique invented to dynamically
                 allocate physical vector registers to threads, our VP
                 virtualization approach improves programmer
                 productivity by providing at runtime a distinct
                 physical register name space to each competing thread,
                 thus eliminating the need to solve register-name
                 conflicts statically. We applied our virtualization
                 technique to a multithreaded VP and prototyped an
                 FPGA-based multicore processor system that supports VP
                 sharing as well as power gating for better energy
                 efficiency. Under the dynamic creation of disparate
                 threads, our benchmarking results show impressive VP
                 speedups of up to 333\% and total energy savings of up
                 to 37\% with proper thread scheduling and power gating
                 compared to a similar-sized system that allows VP
                 access to just one thread at a time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2016:HMR,
  author =       "Hwajeong Seo and Zhe Liu and Yasuyuki Nogami and
                 Jongseok Choi and Howon Kim",
  title =        "Hybrid {Montgomery} Reduction",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "58:1--58:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2890502",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a hybrid method to improve
                 the performance of the Montgomery reduction by taking
                 advantage of the Karatsuba technique. We divide the
                 Montgomery reduction into two sub-parts, including one
                 for the conventional Montgomery reduction and the other
                 one for Karatsuba-aided multiplication. This approach
                 reduces the multiplication complexity of $n$-limb
                 Montgomery reduction from $ \theta (n^2 + n)$ to
                 asymptotic complexity $ \theta (7 n^2 / 8 + n)$. Our
                 practical implementation results over an 8-bit
                 microcontroller also show performance enhancements by
                 11\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Filippopoulos:2016:IEM,
  author =       "Iason Filippopoulos and Namita Sharma and Francky
                 Catthoor and Per Gunnar Kjeldsberg and Preeti Ranjan
                 Panda",
  title =        "Integrated Exploration Methodology for Data
                 Interleaving and Data-to-Memory Mapping on {SIMD}
                 Architectures",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "59:1--59:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2894754",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This work presents a methodology for efficient
                 exploration of data interleaving and data-to-memory
                 mapping options for Single Instruction Multiple Data
                 (SIMD) platform architectures. The system architecture
                 consists of a reconfigurable clustered scratch-pad
                 memory and a SIMD functional unit, which performs the
                 same operation on multiple input data in parallel. The
                 memory accesses contribute substantially to the overall
                 energy consumption of an embedded system executing a
                 data intensive task. The scope of this work is the
                 reduction of the overall energy consumption by
                 increasing the utilization of the functional units and
                 decreasing the number of memory accesses. The presented
                 methodology is tested using a number of benchmark
                 applications with holes in their access scheme.
                 Potential gains are calculated based on the energy
                 models, both for the processing and the memory part of
                 the system. The reduction in energy consumption after
                 efficient interleaving and mapping of data is between
                 40\% and 80\% for the complete system and the studied
                 benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ferreira:2016:LRF,
  author =       "Ronaldo R. Ferreira and Gabriel L. Nazar and Jean {Da
                 Rolt} and {\'A}lvaro F. Moreira and Luigi Carro",
  title =        "Live-Out Register Fencing: Interrupt-Triggered Soft
                 Error Correction Based on the Elimination of
                 Register-to-Register Communication",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "60:1--60:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2873058",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces Live-Out Register Fencing
                 (LoRF), a soft error correction mechanism that uses the
                 novel Spill Register File as a container of
                 checkpointing data. LoRF's Spill Register File holds
                 the values shared among basic blocks in the program,
                 and, coupled with a new compilation strategy, LoRF
                 allows for error correction in the same basic block
                 where the error was detected. In LoRF, error correction
                 is triggered by a hardware interrupt that restores the
                 registers of a basic block from the Spill Register
                 File. After these registers are restored, the basic
                 block where the error was detected can just be
                 re-executed, thus reducing the costs of error recovery.
                 LoRF's error correction policy eliminates the need for
                 expensive architectural support for checkpointing and
                 rollback, reducing the performance overhead of online
                 soft error correction. LoRF relies on both a modified
                 processor architecture and a corresponding compiler.
                 The architecture was implemented in synthesizable VHDL,
                 whereas the compiler was developed as an extension of
                 the LLVM framework. Fault injection experiments support
                 an error correction coverage of 99.35\% and a mean
                 performance overhead of 1.33 for the entire life cycle
                 of an error from its occurrence to its elimination from
                 the system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khalid:2016:RHL,
  author =       "Ayesha Khalid and Goutam Paul and Anupam Chattopadhyay
                 and Faezeh Abediostad and Syed Imad Ud Din and Muhammad
                 Hassan and Baishik Biswas and Prasanna Ravi",
  title =        "{RunStream}: a High-Level Rapid Prototyping Framework
                 for Stream Ciphers",
  journal =      j-TECS,
  volume =       "15",
  number =       "3",
  pages =        "61:1--61:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2891412",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 21 17:18:13 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present RunStream, a rapid prototyping framework
                 for realizing stream cipher implementations based on
                 algorithmic specifications and architectural
                 customizations desired by the users. In the dynamic
                 world of cryptography where newer recommendations are
                 frequently proposed, the need of such tools is
                 imperative. It carries out design validation and
                 generates an optimized software implementation and a
                 synthesizable Register Transfer Level Verilog
                 description. Our framework enables speedy benchmarking
                 against critical resources like area, throughput,
                 power, and latency and allows exploration of
                 alternatives. Using RunStream, we successfully
                 implemented various stream ciphers and benchmarked the
                 quality of results to be at par with published
                 hand-optimized implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2016:ESE,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Security of Embedded Systems and Cyber
                 Irons --- Embedded Systems for Security",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "62:1--62:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2976731",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Eles:2016:GES,
  author =       "Petru Eles and Rolf Ernst",
  title =        "Guest Editorial for Special Issue of {ESWEEK 2015}",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "63:1--63:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2968218",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{You:2016:VVA,
  author =       "Yi-Ping You and Szu-Chien Chen",
  title =        "{VecRA}: a Vector-Aware Register Allocator for {GPU}
                 Shader Processors",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "64:1--64:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2961026",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Graphics processing units (GPUs) are now widely used
                 in embedded systems for manipulating computer graphics
                 and even for general-purpose computation. However, many
                 embedded systems have to manage highly restricted
                 hardware resources in order to achieve high performance
                 or energy efficiency. The number of registers is one of
                 the common limiting factors in an embedded GPU design.
                 Programs that run with a low number of registers may
                 suffer from high register pressure if register
                 allocation is not properly designed, especially on a
                 GPU in which a register is divided into four elements
                 and each element can be accessed separately, because
                 allocating a register for a vector-type variable that
                 does not contain values in all elements wastes register
                 spaces. In this article, we present a vector-aware
                 register allocation framework to improve register
                 utilization on shader architectures. The framework
                 involves two major components: (1) element-based
                 register allocation that allocates registers based on
                 the element requirement of variables and (2) register
                 packing that rearranges elements of registers in order
                 to increase the number of contiguous free elements,
                 thereby keeping more live variables in registers.
                 Experimental results on a cycle-approximate simulator
                 showed that the proposed framework decreased 92\% of
                 register spills in total and made 91.7\% of 14 common
                 shader programs spill free. These results indicate an
                 opportunity for energy management of the space that is
                 used for storing spilled variables, with the framework
                 improving the performance by a geometric mean of 8.3\%,
                 16.3\%, and 29.2\% for general shader processors in
                 which variables are spilled to memory with 5-, 10-, and
                 20-cycle access latencies, respectively. Furthermore,
                 the reduction in the register requirement of programs
                 enabled another 11 programs with high register pressure
                 to be runnable on a lightweight GPU.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2016:ETA,
  author =       "Weichen Liu and Chunhua Xiao",
  title =        "An Efficient Technique of Application Mapping and
                 Scheduling on Real-Time Multiprocessor Systems for
                 Throughput Optimization",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "65:1--65:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950051",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multiprocessor systems are becoming ubiquitous in
                 today's embedded systems design. In this article, we
                 address the problem of mapping an application
                 represented by a Homogeneous Synchronous Dataflow
                 (HSDF) graph onto a real-time multiprocessor platform
                 with the objective of maximizing total throughput. We
                 propose that the optimal solution to the problem is
                 composed of three components: actor-to-processor
                 mapping, retiming, and actor ordering on each
                 processor. The entire problem is systematically modeled
                 into a Boolean Satisfiability (SAT) problem such that
                 the optimal solution can be guaranteed theoretically.
                 In order to explore the vast solution space more
                 efficiently, we develop a specific HSDF theory solver
                 based on the special characteristics of the timed HSDF,
                 and integrate it into the general search framework of
                 the SAT solver. Two alternative integration methods
                 based on branch-and-bound are presented to achieve
                 early branch pruning in the search space; thus, the
                 scalability is greatly improved. Extensive performance
                 evaluation on synthetic examples and a case study on
                 the realistic H.264 Video Decoder show that our
                 approach provides as much as 76.9\% throughput
                 improvement, and is scalable to industry-sized
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Duraisamy:2016:HPE,
  author =       "Karthi Duraisamy and Hao Lu and Partha Pratim Pande
                 and Ananth Kalyanaraman",
  title =        "High-Performance and Energy-Efficient Network-on-Chip
                 Architectures for Graph Analytics",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "66:1--66:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2961027",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With its applicability spanning numerous data-driven
                 fields, the implementation of graph analytics on
                 multicore platforms is gaining momentum. One of the
                 most important components of a multicore chip is its
                 communication backbone. Due to inherent irregularities
                 in data movements manifested by graph-based
                 applications, it is essential to design efficient
                 on-chip interconnection architectures for multicore
                 chips performing graph analytics. In this article, we
                 present a detailed analysis of the traffic patterns
                 generated by graph-based applications when mapped to
                 multicore chips. Based on this analysis, we explore the
                 design-space for the Network-on-Chip (NoC) architecture
                 to enable an efficient implementation of graph
                 analytics. We principally consider three types of NoC
                 architectures, viz., traditional mesh, small-world, and
                 high-radix networks. We demonstrate that the
                 small-world-network-enabled wireless NoC (WiNoC) is the
                 most suitable platform for executing the considered
                 graph applications. The WiNoC achieves an average of
                 38\% and 18\% full-system Energy Delay Product savings
                 compared to wireline-mesh and high-radix NoCs,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kriebel:2016:RAA,
  author =       "Florian Kriebel and Semeen Rehman and Arun
                 Subramaniyan and Segnon Jean Bruno Ahandagbe and
                 Muhammad Shafique and J{\"o}rg Henkel",
  title =        "Reliability-Aware Adaptations for Shared Last-Level
                 Caches in Multi-Cores",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "67:1--67:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2961059",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "On account of their large footprint, on-chip
                 last-level caches in multi-core systems are one of the
                 most vulnerable components to soft errors. However,
                 vulnerability to soft errors highly depends on the
                 configuration and parameters of the last-level cache,
                 especially when executing different applications
                 concurrently. In this article we propose a novel
                 reliability-aware reconfigurable last-level cache
                 architecture (R$^2$ Cache) and cache vulnerability
                 model for multi-cores. R$^2$ Cache supports various
                 reliability-wise efficient cache configurations (i.e.,
                 cache parameter selection and cache partitioning) for
                 different concurrently executing applications. The
                 proposed vulnerability model takes into account the
                 vulnerability of both the data and tag arrays as well
                 as the active cache area for applications in different
                 execution phases. To enable runtime adaptations, we
                 introduce a lightweight online vulnerability predictor
                 that exploits the knowledge of performance metrics like
                 number of L2 misses to accurately estimate the cache
                 vulnerability to soft errors. Based on the predicted
                 vulnerabilities of different concurrently executing
                 applications in the current execution epoch, our
                 runtime reliability manager reconfigures the cache such
                 that, for the next execution epoch, the total
                 vulnerability for all concurrently executing
                 applications is minimized under user-provided tolerable
                 performance/energy overheads. In scenarios where
                 single-bit error correction for cache lines may be
                 afforded, vulnerability-aware reconfigurations can be
                 leveraged to increase the reliability of the last-level
                 cache against multi-bit errors. Compared to
                 state-of-the-art vulnerability-minimizing and
                 reconfigurable caches, the proposed architecture
                 provides 35.27\% and 23.42\% vulnerability savings,
                 respectively, when averaged across numerous
                 experiments, while reducing the vulnerability by more
                 than 65\% and 60\%, respectively, for selected
                 applications and application phases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Spasic:2016:IHR,
  author =       "Jelena Spasic and Di Liu and Emanuele Cannella and
                 Todor Stefanov",
  title =        "On the Improved Hard Real-Time Scheduling of
                 Cyclo-Static Dataflow",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "68:1--68:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2932188",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, it has been shown that the hard real-time
                 scheduling theory can be applied to streaming
                 applications modeled as acyclic Cyclo-Static Dataflow
                 (CSDF) graphs. However, this recent approach is not
                 always efficient in terms of throughput and processor
                 utilization. Therefore, in this article, we propose an
                 improved hard real-time scheduling approach to schedule
                 streaming applications modeled as acyclic CSDF graphs
                 on a Multiprocessor System-on-Chip (MPSoC) platform.
                 The proposed approach converts each actor in a CSDF
                 graph to a set of real-time periodic tasks. The
                 conversion enables application of many hard real-time
                 scheduling algorithms that offer fast calculation of
                 the required number of processors for scheduling the
                 tasks. In addition, we propose a method to reduce the
                 graph latency when the converted tasks are scheduled as
                 real-time periodic tasks. We evaluate the performance
                 and time complexity of our approach in comparison to
                 several existing scheduling approaches. Experiments on
                 a set of real-life streaming applications demonstrate
                 that our approach (1) results in systems with higher
                 throughput and better processor utilization in
                 comparison to the existing hard real-time scheduling
                 approach for CSDF graphs, while requiring comparable
                 time for the system derivation; (2) delivers shorter
                 application latency by applying the proposed method for
                 graph latency reduction while providing better
                 throughput and processor utilization when compared to
                 the existing hard real-time scheduling approach; (3)
                 gives the same throughput as the existing periodic
                 scheduling approach for CSDF graphs, but requires much
                 shorter time to derive the task schedule and tasks'
                 parameters (periods, start times, and so on); and (4)
                 gives the throughput that is equal to or very close to
                 the maximum achievable throughput of an application
                 obtained via self-timed scheduling, but requires much
                 shorter time to derive the schedule. The total time
                 needed for the proposed conversion approach and the
                 calculation of the minimum number of processors needed
                 to schedule the tasks and the calculation of the size
                 of communication buffers between tasks is in the range
                 of seconds.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Allamigeon:2016:SAM,
  author =       "Xavier Allamigeon and St{\'e}phane Gaubert and Nikolas
                 Stott and {\'E}ric Goubault and Sylvie Putot",
  title =        "A Scalable Algebraic Method to Infer Quadratic
                 Invariants of Switched Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "69:1--69:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2932187",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a new numerical abstract domain based on
                 ellipsoids designed for the formal verification of
                 switched linear systems. Unlike the existing
                 approaches, this domain does not rely on a user-given
                 template. We overcome the difficulty that ellipsoids do
                 not have a lattice structure by exhibiting a canonical
                 operator over-approximating the union. This operator is
                 the only one that permits the performance of analyses
                 that are invariant with respect to a linear
                 transformation of state variables. It provides the
                 minimum volume ellipsoid enclosing two given
                 ellipsoids. We show that it can be computed in O (
                 n$^3$ ) elementary algebraic operations. We finally
                 develop a fast nonlinear power-type algorithm, which
                 allows one to determine sound quadratic invariants on
                 switched systems in a tractable way, by solving
                 fixed-point problems over the space of ellipsoids. We
                 test our approach on several benchmarks, and compare it
                 with the standard techniques based on linear matrix
                 inequalities, showing an important speedup on typical
                 instances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2016:SAR,
  author =       "Xueguang Wu and Liqian Chen and Antoine Min{\'e} and
                 Wei Dong and Ji Wang",
  title =        "Static Analysis of Runtime Errors in Interrupt-Driven
                 Programs via Sequentialization",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "70:1--70:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2914789",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded software often involves intensive numerical
                 computations and suffers from a number of runtime
                 errors. The technique of numerical static analysis is
                 of practical importance for checking the correctness of
                 embedded software. However, most of the existing
                 approaches of numerical static analysis consider
                 sequential programs, while interrupts are a commonly
                 used facility that introduces concurrency in embedded
                 systems. Therefore, a numerical static analysis
                 approach is highly desired for embedded software with
                 interrupts. In this article, we propose a static
                 analysis approach specifically for interrupt-driven
                 programs based on sequentialization techniques. We
                 present a method to sequentialize interrupt-driven
                 programs into nondeterministic sequential programs
                 according to the semantics of interrupts. The key
                 benefit of using sequentialization is the ability to
                 leverage the power of state-of-the-art analysis and
                 verification techniques for sequential programs to
                 analyze interrupt-driven programs, for example, the
                 power of numerical abstract interpretation to analyze
                 numerical properties of the sequentialized programs.
                 Furthermore, to improve the analysis precision and
                 scalability, we design specific abstract domains to
                 analyze sequentialized interrupt-driven programs by
                 considering their specific features. Finally, we
                 present encouraging experimental results obtained by
                 our prototype implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baudart:2016:LTT,
  author =       "Guillaume Baudart and Albert Benveniste and Timothy
                 Bourke",
  title =        "Loosely Time-Triggered Architectures: Improvements and
                 Comparisons",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "71:1--71:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2932189",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Loosely Time-Triggered Architectures (LTTAs) are a
                 proposal for constructing distributed embedded control
                 systems. They build on the quasi-periodic architecture,
                 where computing units execute nearly periodically, by
                 adding a thin layer of middleware that facilitates the
                 implementation of synchronous applications. In this
                 article, we show how the deployment of a synchronous
                 application on a quasi-periodic architecture can be
                 modeled using a synchronous formalism. Then we detail
                 two protocols, Back-Pressure LTTA, reminiscent of
                 elastic circuits, and Time-Based LTTA, based on
                 waiting. Compared to previous work, we present
                 controller models that can be compiled for execution, a
                 simplified version of the Time-Based protocol and
                 optimizations for systems using broadcast
                 communication. We also compare the LTTA approach with
                 architectures based on clock synchronization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shen:2016:UAS,
  author =       "Jie Shen and Yingjue Cai and Yang Ren and Xiao Yang",
  title =        "A Universal Application Storage System Based on Smart
                 Card",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "72:1--72:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2886116",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Nowadays, electronic commerce (e-commerce) has brought
                 facilitation to people's daily lives. Smart-card-based
                 systems are widely used as an implementation, where
                 smart cards act as a secure carrier for small-sized
                 data. However, most of these systems are developed and
                 managed by each service provider individually and
                 repeatedly, which causes both unnecessary work and
                 difficulties in future maintenance. Besides, advantages
                 of smart card technology are not full-fledged for the
                 lack of enough consideration in flexibility and
                 security. To propose a solution, this article presents
                 a Universal Application Storage System, including card
                 side, terminal side, and back-end system. The card side
                 provides a universal and secured infrastructure for
                 data storage, where data are organized and stored in a
                 card file system with several security mechanisms. In
                 the terminal side, a framework for accessing various
                 forms of secure element is presented to simplify the
                 procedures involved in manipulating smart cards.
                 Through this framework, the back-end system is able to
                 establish a direct connection to the card, and performs
                 authorized operations by exchanging commands in a
                 secure channel. The validity of the proposed system is
                 verified at the end of this article, illustrated by an
                 e-coupon system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hassan:2016:HSB,
  author =       "Hadeer A. Hassan and Sameh A. Salem and Ahmed M.
                 Mostafa and E. M. Saad",
  title =        "Harmonic Segment-Based Semi-Partitioning Scheduling on
                 Multi-Core Real-Time Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "73:1--73:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2933388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Nowadays, the issue of scheduling multi-core real-time
                 systems has become the focus of such research in
                 industrial, biomedical, military, and other fields. As
                 a consequence, a new semi-partitioning algorithm that
                 uses a static Rate-Monotonic criterion to schedule
                 real-time tasks on multi-core platforms is proposed.
                 The improvement in the performance of real-time systems
                 is achieved by exploiting the fact that the utilization
                 boundary of a task set increases to fully utilize the
                 processors if the periods of tasks have harmonic nature
                 among each other. Experimental results on randomly
                 generated datasets and real-world datasets show that
                 the proposed algorithm inevitably outperforms other
                 competitive algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2016:JJO,
  author =       "Chin-Hsien Wu and Syuan-An Chen",
  title =        "{JOM}: a Joint Operation Mechanism for {NAND} Flash
                 Memory",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "74:1--74:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2915916",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the storage systems of NAND flash memory, an
                 intermediate software called a Flash Translation Layer
                 (FTL) is adopted to hide the characteristics of NAND
                 flash memory and provide efficient management for NAND
                 flash memory. Current flash translation layers can be
                 classified into a page-mapping FTL, a block-mapping
                 FTL, and a hybrid-mapping FTL. In order to utilize the
                 advantages of the page-mapping FTL and the
                 block-mapping FTL, the hybrid-mapping FTL is proposed
                 to store data to the appropriate mapping mechanism by
                 switching the mapping information between the
                 page-mapping mechanism and the block-mapping mechanism.
                 In the article, we propose a joint operation mechanism
                 to rethink the advantages of the page-mapping FTL, the
                 block-mapping FTL, and the hybrid-mapping FTL. With the
                 joint operation mechanism, a flash translation layer
                 can consider the main memory requirements, improve the
                 system performance, and reduce the garbage collection
                 overhead. The experimental results show that the
                 proposed joint operation mechanism can achieve the goal
                 under realistic workloads and benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chiew:2016:NEI,
  author =       "Wei Ming Chiew and Feng Lin and Hock Soon Seah",
  title =        "A Novel Embedded Interpolation Algorithm with Negative
                 Squared Distance for Real-Time Endomicroscopy",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "75:1--75:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2905367",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Interpolation is the most executed operation and one
                 of the main bottlenecks in embedded imaging,
                 registration, and rendering systems. Existing methods
                 either lack parallelization and scalability
                 capabilities or are too computationally complex to
                 execute efficiently. Acknowledging that improving
                 execution time leads to degradation in image quality,
                 we formulate a novel Negative Squared Distance (NSD)
                 interpolation method that exhibits excellent
                 performance by exploiting Look-Up Table (LUT)
                 optimization for Field Programmable Gate Array (FPGA)
                 speedup, with a balanced trade-off in quality in our
                 embedded endomicroscopic imaging system. Quantitative
                 analysis on performance and resource utilization of NSD
                 against existing methods is reported through an
                 implementation on a Xilinx ML605 platform. Functional
                 validation using practical image resizing and rotation
                 applications to compare qualitative performance against
                 existing algorithms is performed and presented with
                 visual and numerical results. Our method is shown to
                 have a smaller design size and produces a maximum
                 throughput of over twofold against trilinear
                 interpolation with on-par image quality as the baseline
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2016:CFQ,
  author =       "Chun-Han Lin and Chih-Kai Kang and Pi-Cheng Hsiu",
  title =        "{CURA}: a Framework for Quality-Retaining Power Saving
                 on Mobile {OLED} Displays",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "76:1--76:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2909875",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Organic Light-Emitting Diode (OLED) technology is
                 regarded as a promising alternative to mobile displays.
                 In this article, we introduce the design, algorithm,
                 and implementation of a novel framework called CURA for
                 quality-retaining power saving on mobile OLED displays.
                 First, we link human visual attention to OLED power
                 saving and model the OLED image scaling optimization
                 problem. The objective is to minimize the power
                 required to display an image without adversely
                 impacting the user's visual experience. Then, we
                 present the algorithm used to solve the modeled
                 problem, and prove its optimality even without an
                 accurate power model. Finally, based on the framework,
                 we implement two practical applications on a commercial
                 OLED mobile tablet. The results of experiments
                 conducted on the tablet with real images demonstrate
                 that CURA can reduce significant OLED power consumption
                 while retaining the visual quality of images.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hester:2016:PCB,
  author =       "Josiah Hester and Nicole Tobias and Amir Rahmati and
                 Lanny Sitanayah and Daniel Holcomb and Kevin Fu and
                 Wayne P. Burleson and Jacob Sorber",
  title =        "Persistent Clocks for Batteryless Sensing Devices",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "77:1--77:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2903140",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sensing platforms are becoming batteryless to enable
                 the vision of the Internet of Things, where trillions
                 of devices collect data, interact with each other, and
                 interact with people. However, these batteryless
                 sensing platforms-that rely purely on energy
                 harvesting-are rarely able to maintain a sense of time
                 after a power failure. This makes working with sensor
                 data that is time sensitive especially difficult. We
                 propose two novel, zero-power timekeepers that use
                 remanence decay to measure the time elapsed between
                 power failures. Our approaches compute the elapsed time
                 from the amount of decay of a capacitive device, either
                 on-chip Static Random-Access Memory (SRAM) or a
                 dedicated capacitor. This enables hourglass-like timers
                 that give intermittently powered sensing devices a
                 persistent sense of time. Our evaluation shows that
                 applications using either timekeeper can keep time
                 accurately through power failures as long as 45s with
                 low overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xi:2016:FSS,
  author =       "Kai Xi and Jiankun Hu and B. V. K. Vijaya Kumar",
  title =        "{FE-SViT}: a {SViT}-Based Fuzzy Extractor Framework",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "78:1--78:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930669",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As a promising bio-cryptographic technique, the fuzzy
                 extractor seamlessly binds biometrics and cryptography
                 for template protection and key generation. However,
                 most existing methods hardly solve the following issues
                 simultaneously: (1) Fingerprint registration, (2)
                 Verification accuracy, (3) Security strength, and (4)
                 Computational efficiency. In this article, we introduce
                 a bio-crypto-oriented fingerprint verification scheme
                 --- Selective Vertex-indexed Triangulation (SViT) which
                 maps minutia global topology to local triangulation
                 with minimum information loss. Then, a SViT-based fuzzy
                 extractor framework (FE-SViT) is proposed and high
                 verification accuracy is achieved. The FE-SViT is
                 highly parallelizable and efficient which makes it
                 suitable for embedded devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Olivier:2016:MEP,
  author =       "Pierre Olivier and Jalil Boukhobza and Eric Senn and
                 Hamza Ouarnoughi",
  title =        "A Methodology for Estimating Performance and Power
                 Consumption of Embedded Flash File Systems",
  journal =      j-TECS,
  volume =       "15",
  number =       "4",
  pages =        "79:1--79:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2903139",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 1 16:03:45 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the embedded systems domain, obtaining performance
                 and power consumption estimations is extremely valuable
                 in numerous cases. This is particularly true during the
                 design stage, as designers of complex embedded systems
                 face an increasingly large design space. Secondary
                 storage is a well-known performance bottleneck and has
                 also been reported as an important factor of power
                 consumption. Flash memory is the main secondary storage
                 media in an embedded system and exhibits specific
                 constraints in its usage. One popular way to manage
                 these constraints is to use dedicated Flash File
                 Systems (FFS). In this article, we propose a
                 methodology to estimate the performance and power
                 consumption of applicative I/Os on an FFS-based storage
                 system within embedded Linux. The methodology is
                 divided into three sequential steps. In the exploration
                 phase, the main factors of an FFS storage system
                 impacting performance and power consumption are
                 identified. In the modeling phase, this impact is
                 formalized into models. Finally, in the last phase, the
                 models are implemented in a simulator named OpenFlash.
                 OpenFlash allows obtaining performance and power
                 consumption estimations for an applicative workload
                 processed by the Linux FFS storage stack on an embedded
                 platform. The simulator is validated against real
                 measurements and the estimation error stays below
                 10\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2016:EDP,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Distributed Public Ledgers and Block Chains
                 --- What Good Are They for Embedded Systems?",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "1:1--1:2",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3001902",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Castrillon:2016:GES,
  author =       "Jeronimo Castrillon and Cristina Silvano",
  title =        "Guest Editorial: Special Issue on {Virtual Prototyping
                 of Parallel and Embedded Systems (ViPES)}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "2:1--2:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2991466",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Bortolotti:2016:VRT,
  author =       "Daniele Bortolotti and Andrea Marongiu and Luca
                 Benini",
  title =        "{VirtualSoC}: a Research Tool for Modern {MPSoCs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "3:1--3:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930665",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Architectural heterogeneity has proven to be an
                 effective design paradigm to cope with an
                 ever-increasing demand for computational power within
                 tight energy budgets, in virtually every computing
                 domain. Programmable manycore accelerators are
                 currently widely used not only in high-performance
                 computing systems, but also in embedded devices, in
                 which they operate as coprocessors under the control of
                 a general-purpose CPU (the host processor). Clearly,
                 such powerful hardware architectures are paired with
                 sophisticated and complex software ecosystems, composed
                 of operating systems, programming models plus
                 associated runtime engines, and increasingly complex
                 user applications with related libraries. System
                 modeling has always played a key role in early
                 architectural exploration or software development when
                 the real hardware is not available. The necessity of
                 efficiently coping with the huge HW/SW design space
                 provided by the described heterogeneous Systems on Chip
                 (SoCs) calls for advanced full-system simulation
                 methodologies and tools, capable of assessing various
                 metrics for the functional and nonfunctional properties
                 of the target system. In this article, we describe
                 VirtualSoC, a simulation tool targeting the full-system
                 simulation of massively parallel heterogeneous SoCs. We
                 also describe how VirtualSoC has been successfully
                 adopted in several research projects.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Wehner:2016:SRM,
  author =       "Philipp Wehner and Jens Rettkowski and Tobias Kalb and
                 Diana G{\"o}hringer",
  title =        "Simulating Reconfigurable Multiprocessor
                 Systems-on-Chip with {MPSoCSim}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "4:1--4:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2972952",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Upcoming reconfigurable Multiprocessor Systems-on-Chip
                 (MPSoCs) present new challenges for the design and
                 early estimation of technology requirements due to
                 their runtime adaptive hardware architecture. The usage
                 of simulators offers capabilities to overcome these
                 issues. In this article, MPSoCSim, a SystemC simulator
                 for Network-on-Chip (NoC) based MPSoCs is extended to
                 support the simulation of reconfigurable MPSoCs.
                 Processors, such as ARM and MicroBlaze, and peripheral
                 models used within the virtual platform are provided by
                 Imperas/OVP and attached to the NoC. Moreover, traffic
                 generators are available to analyze the system. The
                 virtual platform currently supports mesh topology with
                 wormhole switching and several routing algorithms such
                 as XY-, a minimal West-First algorithm, and an adaptive
                 West-First algorithm. Amongst the impact of routing
                 algorithms regarding performance, reconfiguration
                 processes can be examined using the presented
                 simulator. A mechanism for dynamic partial
                 reconfiguration is implemented that is oriented towards
                 the reconfiguration scheme on real FPGA platforms. It
                 includes the simulation of the undefined behavior of
                 the hardware region during reconfiguration and allows
                 the adjustment of parameters. During runtime, dynamic
                 partial reconfiguration interfaces are used to connect
                 the Network-on-Chip infrastructure with reconfigurable
                 regions. The configuration access ports can be modeled
                 by the controller for the dynamic partial
                 reconfiguration in form of an application programming
                 interface. An additional SystemC component enables the
                 readout of simulation time from within the application.
                 For evaluation of the simulator timing and power
                 consumption of the simulated hardware are estimated and
                 compared with a real hardware implementation on a
                 Xilinx Zynq FPGA. The comparison shows that the
                 simulator improves the development of reconfigurable
                 MPSoCs by early estimation of system requirements. The
                 power estimations show a maximum deviation of 9mW at
                 1.9W total power consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Sauer:2016:LFD,
  author =       "Christian Sauer and Hans-Peter Loeb",
  title =        "A Lightweight Framework for the Dynamic Creation and
                 Configuration of Virtual Platforms in {SystemC}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "5:1--5:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983626",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Virtual prototypes leverage SystemC/TLM for simulating
                 programmable platforms comprising hundreds of modules.
                 Their efficient creation and configuration is vital for
                 acceptable turnaround times, for example, during
                 performance exploration or software development.
                 Therefore, our lightweight framework provides a factory
                 that creates designs from abstract descriptions of
                 module instances, properties, and connections. Modules
                 mark properties as creation or runtime parameters. The
                 resulting generic design descriptions are usable by
                 non-experts and enable front-ends. The infrastructure
                 is a small C++ library with only 1,350 lines of code
                 that can be combined with existing SystemC/TLM models
                 and simulation kernels. An industrial case study of a
                 complex multiprocessor SoC shows a distinct
                 productivity gain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Meyer:2016:SSC,
  author =       "Rolf Meyer and Jan Wagner and Bastian Farkas and Sven
                 Horsinka and Patrick Siegl and Rainer Buchty and Mladen
                 Berekovic",
  title =        "A Scriptable Standard-Compliant Reporting and Logging
                 Framework for {SystemC}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "6:1--6:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983623",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the ever-increasing complexity of digital
                 designs, debugging and evaluation face likewise
                 increasing challenges. While recent advances in
                 hardware/software co-simulation have been made,
                 solutions for corresponding debugging and evaluation
                 did not mature and improve in a similar fashion. In
                 this article, we present a dedicated solution to ease
                 the debugging and evaluation efforts, particularly
                 focusing on full-system simulation. Improving
                 significantly over existing solutions, the presented
                 approach features a standards-compliant powerful and
                 flexible method of deriving, logging, and filtering
                 detailed status information from SystemC-based models.
                 At the core of this approach are flexible scripting
                 capabilities that may change all logging parameters
                 during runtime, thus not requiring re-compiling the
                 to-be-simulated model, as in many competing solutions.
                 The approach is tested and benchmarked with a
                 real-world full-system example, demonstrating the
                 overall benefits. The presented solution is published
                 as open source via github (see text) and, by strictly
                 adhering to existing standards, is generally compatible
                 with existing SystemC simulation environments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Murillo:2016:MSD,
  author =       "Luis Gabriel Murillo and R{\`o}bert Lajos B{\"u}cs and
                 Rainer Leupers and Gerd Ascheid",
  title =        "{MPSoC} Software Debugging on Virtual Platforms via
                 Execution Control with Event Graphs",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "7:1--7:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950052",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Virtual Platforms (VPs) are advantageous to develop
                 and debug complex software for multi- and
                 many-processor systems-on-chip (MPSoCs). VPs provide
                 unrivaled controllability and visibility of the target,
                 which can be exploited to examine bugs that cannot be
                 reproduced easily in real hardware (e.g., bugs
                 originating from races or happening during a processor
                 stand-by state). However, VPs as employed in practice
                 for debugging are generally underutilized. The
                 accompanying debug ecosystem is based mostly on
                 traditional tools, such as step-based debuggers and
                 traces, that fall short to address the enormous
                 complexity of modern MPSoCs and their parallel
                 software. Finding a bug is still largely left to the
                 developer's experience and intuition, using manual
                 means rather than automated or systematic solutions
                 that exploit the controllability and visibility of VPs.
                 Profiting from VPs for MPSoC software debugging is an
                 open question. To bridge this gap, this article
                 presents a novel framework for debug visualization and
                 execution control that, relying on the many benefits of
                 VPs, helps to identify and test possible
                 concurrency-related bug scenarios. The framework allows
                 examining and steering the target system by
                 manipulating an abstract graph that highlights relevant
                 inter-component interactions and dependencies. The
                 proposed framework reduces the effort required to
                 understand complex concurrency patterns and helps to
                 expose bugs. Its efficacy is demonstrated on (i) a
                 shared memory symmetric multi-processing platform
                 executing Linux and parallel benchmarks, and (ii) a
                 distributed automotive system for driver assistance
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Sotiriou-Xanthopoulos:2016:FIA,
  author =       "Efstathios Sotiriou-Xanthopoulos and Sotirios Xydis
                 and Kostas Siozios and George Economakos and Dimitrios
                 Soudris",
  title =        "A Framework for Interconnection-Aware Domain-Specific
                 Many-Accelerator Synthesis",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "8:1--8:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983624",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many-accelerator Systems-on-Chip (SoC) have recently
                 emerged as a promising platform paradigm that combines
                 parallelization with heterogeneity, in order to cover
                 the increasing demands for high performance and energy
                 efficiency. To exploit the full potential of
                 many-accelerator systems, automated design verification
                 and analysis frameworks are required, targeted to both
                 computational and interconnection optimization.
                 Accurate simulation of interconnection schemes should
                 use real stimuli, which are produced from fully
                 functional nodes, requiring the prototyping of the
                 processing elements and memories of the
                 many-accelerator system. In this article, we argue that
                 the Hierarchical Network-on-Chip (HNoC) scheme forms a
                 very promising solution for many-accelerator systems in
                 terms of scalability and data-congestion minimization.
                 We present a parameterizable SystemC prototyping
                 framework for HNoCs, targeted to domain-specific
                 many-accelerator systems. The framework supports the
                 prototyping of processing elements, memory modules, and
                 underlying interconnection infrastructure, while it
                 provides an API for their easy integration to the HNoC.
                 Finally, it enables holistic system simulation using
                 real node data. Using as a case study a
                 many-accelerator system of an MRI pipeline, an analysis
                 on the proposed framework is presented to demonstrate
                 the impact of the system parameters on the system.
                 Through extensive experimental analysis, we show the
                 superiority of HNoC schemes in comparison to typical
                 interconnection architectures. Finally, we show that,
                 adopting the proposed many-accelerator design flow,
                 significant performance improvements are achieved, from
                 $ 1.2 \times $ up to $ 26 \times $, as compared to a
                 x86 software implementation of the MRI pipeline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Zhu:2016:GES,
  author =       "Dakai Zhu and Meikang Qiu and Samarjit Chakraborty",
  title =        "Guest Editorial: Special Issue on Emerging
                 Technologies in Embedded Software and Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "9:1--9:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2991464",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Jayakumar:2016:SMV,
  author =       "Hrishikesh Jayakumar and Arnab Raha and Vijay
                 Raghunathan",
  title =        "Sleep-Mode Voltage Scaling: Enabling {SRAM} Data
                 Retention at Ultra-Low Power in Embedded
                 Microcontrollers",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "10:1--10:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950054",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In heavily duty-cycled embedded systems, the energy
                 consumed by the microcontroller in idle mode is often
                 the bottleneck for battery lifetime. Existing solutions
                 address this problem by placing the microcontroller in
                 a low-power (sleep) mode when idle and preserving
                 application state either by retaining the data in situ
                 in Static Random Access Memory (SRAM) or by
                 checkpointing it to F lash. However, both of these
                 approaches have notable drawbacks. In situ data
                 retention requires the SRAM to remain powered in sleep
                 mode, while checkpointing to Flash involves significant
                 energy and time overheads. This article proposes a new
                 ultra-low-power sleep mode for microcontrollers that
                 overcomes the limitations of both of these approaches.
                 Our technique, Hypnos, is based on the key observation
                 that the on-chip SRAM in a microcontroller exhibits
                 100\% data retention even at a much lower supply
                 voltage (as much as $ 10 \times $ lower) than the
                 typical operating voltage of the microcontroller.
                 Hypnos exploits this observation by performing extreme
                 voltage scaling when the microcontroller is in sleep
                 mode. We implement and evaluate Hypnos for the TI
                 MSP430G2452 microcontroller and show that the
                 Microcontroller (MCU) draws only 26nA in the proposed
                 sleep mode, which is $ 4 \times $ lower than a baseline
                 sleep mode that preserves SRAM contents. Further, to
                 reduce the overheads associated with performing the
                 voltage scaling, we propose the use of an energy
                 harvesting source for providing the scaled supply
                 voltage and demonstrate (using a light sensing
                 photodiode) that the current consumption in the
                 proposed sleep mode can be reduced to 1nA, which is $
                 100 \times $ lower than the current consumption in the
                 baseline low-power mode. We also show that the decrease
                 in sleep-mode power consumption translates to a
                 reduction in application-level energy consumption by as
                 much as $ 6.45 \times $. By decreasing the average
                 power consumption to such minuscule levels, Hypnos
                 takes a significant step forward in making perpetual
                 systems a reality through the use of energy
                 harvesting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Marz:2016:RPC,
  author =       "Stephen Marz and Brad {Vander Zanden}",
  title =        "Reducing Power Consumption and Latency in Mobile
                 Devices Using an Event Stream Model",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "11:1--11:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2964203",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most consumer-based mobile devices use asynchronous
                 events to awaken apps. Currently, event handling is
                 implemented in either an application or an application
                 framework such as Java's virtual machine (VM) or
                 Microsoft's {.NET}, and it uses a ``polling loop'' that
                 periodically queries an event queue to determine if an
                 event has occurred. These loops must awaken the
                 process, check for an event, and then put the process
                 back to sleep many times per second. This constant
                 arousal prevents the CPU from being put into a deep
                 sleep state, which increases power consumption.
                 Additionally, the process cannot check for events while
                 it sleeps, and this delay in handling events increases
                 latency, which is the time that elapses between when an
                 event occurs and when the application responds to the
                 event. We call this model of event handling a ``pull''
                 model because it needs to query hardware devices or
                 software queues in order to ``pull'' events from them.
                 Recent advances in input devices support direct,
                 informative interrupts to the kernel when an event
                 occurs. This allows us to develop a much more efficient
                 event-handling model called the ``Event Stream Model''
                 (ESM). This model is a push model that allows a process
                 to sleep as long as no event occurs but then
                 immediately awakens a process when an event occurs.
                 This model eliminates the polling loop, thus
                 eliminating latency-inducing sleep between polls and
                 reducing unnecessary power consumption. To work
                 properly, the ESM model must be implemented in the
                 kernel rather than in the application. In this article,
                 we describe how we implemented the ESM model in Android
                 operating system (OS). Our results show that with the
                 event stream model, power consumption is reduced by up
                 to 23.8\% in certain circumstances, and latency is
                 reduced by an average of 13.6ms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Chen:2016:ICA,
  author =       "Renhai Chen and Yi Wang and Jingtong Hu and Duo Liu
                 and Zili Shao and Yong Guan",
  title =        "Image-Content-Aware {I/O} Optimization for Mobile
                 Virtualization",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "12:1--12:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950059",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Mobile virtualization introduces extra layers in
                 software stacks, which leads to performance
                 degradation. Notably, each I/O operation has to pass
                 through several software layers to reach the
                 NAND-flash-based storage systems. This article targets
                 at optimizing I/O for mobile virtualization, since I/O
                 becomes one of major performance bottlenecks that
                 seriously affects the performance of mobile devices.
                 Among all the I/O operations, a large percentage is to
                 update metadata. Frequently updated metadata not only
                 degrade overall I/O performance but also severely
                 reduce flash memory lifetime. In this article, we
                 propose a novel I/O optimization technique to identify
                 the metadata of a guest file system that is stored in a
                 virtual machine image file and frequently updated.
                 Then, these metadata are stored in a small additional
                 non-volatile memory (NVM), which is faster and more
                 endurable to greatly improve flash memory's performance
                 and lifetime. To the best of our knowledge, this is the
                 first work to identify the file system metadata from
                 regular data in a guest OS image file with NVM
                 optimization. The proposed scheme is evaluated on a
                 real hardware embedded platform. The experimental
                 results show that the proposed techniques can improve
                 write performance to 45.21\% in mobile devices with
                 virtualization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Gu:2016:CPP,
  author =       "Zonghua Gu and Chao Wang and Haibo Zeng",
  title =        "Cache-Partitioned Preemption Threshold Scheduling",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "13:1--13:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950057",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For preemptive scheduling with shared cache, different
                 tasks may cause interference in the shared cache,
                 leading to Cache-Related Preemption Overhead (CRPD).
                 Cache partitioning can be used to reduce or eliminate
                 CRPD. We propose integration of cache partitioning and
                 Preemption Threshold Scheduling to optimize
                 schedulability for both Fixed-Priority and Earliest
                 Deadline First scheduling algorithms on a uniprocessor.
                 We let each subset of tasks assigned the same cache
                 partition be a nonpreemptive group by assigning the
                 same preemption threshold to them, which eliminates
                 CRPD both within each cache partition and between
                 different cache partitions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Hu:2016:AWM,
  author =       "Biao Hu and Kai Huang and Gang Chen and Long Cheng and
                 Alois Knoll",
  title =        "Adaptive Workload Management in Mixed-Criticality
                 Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "14:1--14:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950058",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Due to the efficient resource usage of integrating
                 tasks with different criticality onto a shared
                 platform, the integration with mixed-criticality tasks
                 is becoming an increasingly important trend in the
                 design of real-time systems. One challenge in such a
                 mixed-criticality system is to maximize the service for
                 low-critical tasks, while meeting the timing
                 constraints of high-critical tasks. In this article, we
                 investigate how to adaptively manage the low-critical
                 workload during runtime to meet both goals, that is,
                 providing the service for low-critical tasks as much as
                 possible and guaranteeing the hard real-time
                 requirements for high-critical tasks. Unlike previous
                 methods, which enforce an offline bound towards the
                 low-critical workload, runtime adaptation approaches
                 are proposed in which the incoming workload of
                 low-critical tasks is adaptively regulated by
                 considering the actual demand of high-critical tasks.
                 This actual demand of the high-critical tasks, in turn,
                 is adaptively updated using their historical arrival
                 information. Based on this adaptation scheme, two
                 scheduling policies-the priority-adjustment policy and
                 the workload-shaping policy-are proposed to do the
                 workload management. In order to reduce online
                 management overhead, a lightweight scheme with $ O (n
                 \cdot \log (n)) $ complexity is developed. Extensive
                 simulation results are presented to demonstrate the
                 effectiveness of our proposed workload management
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Tuncali:2016:APM,
  author =       "Cumhur Erkan Tuncali and Georgios Fainekos and
                 Yann-Hang Lee",
  title =        "Automatic Parallelization of Multirate Block Diagrams
                 of Control Systems on Multicore Platforms",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950055",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article addresses the problem of parallelizing
                 model block diagrams for real-time embedded
                 applications on multicore architectures. We describe a
                 Mixed Integer Linear Programming formulation for
                 finding a feasible mapping of the blocks to different
                 CPU cores. For single-rate models, we use an objective
                 function that minimizes the overall worst-case
                 execution time. We introduce a set of heuristics to
                 solve the problem for large models in a reasonable
                 time. For multirate models, we solve the feasibility
                 problem for finding a valid mapping. We study the
                 scalability and efficiency of our approach with
                 synthetic benchmarks and an engine controller from
                 Toyota.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Fusella:2016:CAA,
  author =       "Edoardo Fusella and Alessandro Cilardo",
  title =        "Crosstalk-Aware Automated Mapping for Optical
                 Networks-on-Chip",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930666",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Optical networks-on-chip (NoCs) provide a promising
                 answer to address the increasing requirements of
                 ultra-high bandwidth and extremely low power
                 consumption. Designing a photonic interconnect,
                 however, involves a number of challenges that have no
                 equivalent in the electronic domain, particularly the
                 crosstalk noise, which affects the signal-to-noise
                 ratio (SNR) possibly resulting in an inoperable
                 architecture and hence constraining the network
                 scalability. In this article, we point out the
                 implications of application-driven task mapping on
                 crosstalk effects. We motivate the main rationale of
                 our work and provide a formalization of the problem.
                 Then we propose a class of algorithms that
                 automatically map the application tasks onto a generic
                 mesh-based photonic NoC architecture such that the
                 worst-case crosstalk is minimized. We also present a
                 purpose-built experimental setup used for evaluating
                 several architectural solutions in terms of crosstalk
                 noise and SNR. The setup is used to collect extensive
                 results from several real-world applications and case
                 studies. The collected results show that the crosstalk
                 noise can be significantly reduced by adopting our
                 approach, thereby allowing higher network scalability,
                 and can exhibit encouraging improvements over
                 application-oblivious architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Iida:2016:GET,
  author =       "Yuki Iida and Yusuke Fujii and Takuya Azumi and
                 Nobuhiko Nishio and Shinpei Kato",
  title =        "{GPUrpc}: Exploring Transparent Access to Remote
                 {GPUs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950056",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Graphics processing units (GPUs) are increasingly used
                 for high-performance computing. Programming frameworks
                 for general-purpose computing on GPUs (GPGPU), such as
                 CUDA and OpenCL, are also maturing. Driving this trend
                 is the recent proliferation of mobile devices such as
                 smartphones and wearable computers. These devices are
                 increasingly incorporating computationally intensive
                 applications that involve some form of environmental
                 recognition such as augmented reality (AR) or voice
                 recognition. However, devices with low computational
                 power cannot satisfy such demanding computing
                 requirements. The CPU load of these devices could be
                 reduced by offloading computation onto GPUs on the
                 cloud. This paper presents GPUrpc, a remote procedure
                 call (RPC) extension to Gdev, which is a rich set of
                 runtime libraries and device drivers for achieving
                 first-class GPU resource management. GPUrpc allows
                 developers to use CUDA for GPGPU development work.
                 Existing research uses RPCs based on the CUDA
                 application programming interfaces (APIs); hence, all
                 CUDA APIs require communication. To reduce
                 communication overhead, we use an RPC based on a
                 low-level API than CUDA API and reduced API that does
                 not require communication. Our evaluation conducted on
                 Linux and NVIDIA GPUs shows that the basic performance
                 of our prototype implementation is reliable in
                 comparison with the existing method. Evaluation using
                 the Rodinia benchmark suite designed for research in
                 heterogeneous parallel computing showed that GPUrpc is
                 effective for applications such as image processing and
                 data mining. GPUrpc also can improve power consumption
                 to approximately 1/6 that of CPU processing for
                 performing $ 512 \times 512 $ matrix multiplication.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Wang:2016:GTB,
  author =       "Kun Wang and Miao Du and Dejun Yang and Chunsheng Zhu
                 and Jian Shen and Yan Zhang",
  title =        "Game-Theory-Based Active Defense for Intrusion
                 Detection in Cyber-Physical Embedded Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "18:1--18:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2886100",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-Physical Embedded Systems (CPESs) are
                 distributed embedded systems integrated with various
                 actuators and sensors. When it comes to the issue of
                 CPES security, the most significant problem is the
                 security of Embedded Sensor Networks (ESNs). With the
                 continuous growth of ESNs, the security of transferring
                 data from sensors to their destinations has become an
                 important research area. Due to the limitations in
                 power, storage, and processing capabilities, existing
                 security mechanisms for wired or wireless networks
                 cannot apply directly to ESNs. Meanwhile, ESNs are
                 likely to be attacked by different kinds of attacks in
                 industrial scenarios. Therefore, there is a need to
                 develop new techniques or modify the current security
                 mechanisms to overcome these problems. In this article,
                 we focus on Intrusion Detection (ID) techniques and
                 propose a new attack-defense game model to detect
                 malicious nodes using a repeated game approach. As a
                 direct consequence of the game model, attackers and
                 defenders make different strategies to achieve optimal
                 payoffs. Importantly, error detection and missing
                 detection are taken into consideration in Intrusion
                 Detection Systems (IDSs), where a game tree model is
                 introduced to solve this problem. In addition, we
                 analyze and prove the existence of pure Nash
                 equilibrium and mixed Nash equilibrium. Simulations
                 show that the proposed model can both reduce energy
                 consumption by up to 50\% compared with the existing
                 All Monitor (AM) model and improve the detection rate
                 by up to 10\% to 15\% compared with the existing
                 Cluster Head (CH) monitor model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Tan:2016:SSH,
  author =       "Song Tan and Wen-Zhan Song and Steve Yothment and
                 Junjie Yang and Lang Tong",
  title =        "{ScorePlus}: a Software-Hardware Hybrid and Federated
                 Experiment Environment for Smart Grid",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "19:1--19:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2964200",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present ScorePlus, a software-hardware hybrid and
                 federated experiment environment for Smart Grid.
                 ScorePlus incorporates both a software emulator and
                 hardware testbed, such that they all follow the same
                 architecture, and the same Smart Grid application
                 program can be tested on either of them without any
                 modification; ScorePlus provides a federated
                 environment such that multiple software emulators and
                 hardware testbeds at different locations are able to
                 connect and form a unified Smart Grid system; ScorePlus
                 software is encapsulated as a resource plugin in the
                 OpenStack cloud computing platform, such that it
                 supports massive deployments with large-scale test
                 cases in cloud infrastructure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Majmudar:2016:AOR,
  author =       "Charvi A. Majmudar and Bashir I. Morshed",
  title =        "Autonomous {OA} Removal in Real-Time from Single
                 Channel {EEG} Data on a Wearable Device Using a Hybrid
                 Algebraic-Wavelet Algorithm",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "20:1--20:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983629",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Electroencephalography (EEG) is a non-invasive
                 technique to record brain activities in natural
                 settings. Ocular Artifacts (OA) usually contaminates
                 EEG signals, removal of which is critical for accurate
                 feature extraction and classification. With the
                 increasing adoption of wearable technologies,
                 single-channel real-time EEG systems that often require
                 real-time signal processing for immediate real-time
                 feedback are becoming more prevalent. However,
                 traditional OA removal algorithms usually require
                 multiple channels of EEG data, are computationally
                 expensive, and do not perform well in real-time. In
                 this article, a new hybrid algorithm is proposed that
                 autonomously detects OA and subsequently removes OA
                 from a single-channel steaming EEG data in real-time.
                 The proposed single EEG channel algorithm also does not
                 require additional reference electrooculography (EOG)
                 channel. The algorithm has also been implemented on an
                 embedded hardware platform of single channel wearable
                 EEG system (NeuroMonitor). The algorithm first detects
                 the OA zones using an Algebraic approach and then
                 removes these artifacts from the detected OA zones
                 using the Discrete Wavelet Transform (DWT)
                 decomposition method. The de-noising technique is
                 applied only to the OA zone, which minimizes loss of
                 neural information outside the OA zone. A qualitative
                 and quantitative performance evaluation was carried out
                 with a 0.5s epoch in overlapping sliding window
                 technique using time-frequency analysis, mean square
                 coherence, and correlation coefficient statistics. The
                 hybrid OA removal algorithm demonstrated real-time
                 operation with 3s latency on the
                 PSoC-3-microcontroller-based EEG system. Successful
                 implementation of OA removal from single-channel
                 real-time EEG data using the proposed algorithm shows
                 promise for real-time feedback applications of wearable
                 EEG devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Kuan:2016:SEI,
  author =       "Yuan-Hung Kuan and Yuan-Hao Chang and Tseng-Yi Chen
                 and Po-Chun Huang and Kam-Yiu Lam",
  title =        "Space-Efficient Index Scheme for {PCM}-Based
                 Multiversion Databases in Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "21:1--21:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950060",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we study the indexing problem of
                 using PCM as the storage medium for embedded
                 multiversion databases in cyber-physical systems
                 (CPSs). Although the multiversion B$^+$ -tree (MVBT)
                 index has been shown to be efficient in managing
                 multiple versions of data items in a database, MVBT is
                 designed for databases residing in traditional
                 block-oriented storage devices. It can have serious
                 performance problems when the databases are on
                 phase-change memory (PCM). Since the embedded
                 multiversion database in CPSs may have limited storage
                 space and are update intensive, to resolve the problems
                 of MVBT of lack of space efficiency and heavy update
                 cost, we propose a new index scheme, called
                 space-efficient multiversion index (SEMI), to enhance
                 the space utilization and access performance in serving
                 various types of queries. In SEMI, since the number of
                 keys in the database may be small, instead of using a B
                 -tree index, we propose to use a binary-search tree to
                 organize the index keys. Furthermore, multiple versions
                 of the same data item may be stored consecutively and
                 indexed by a single entry to maximize the space
                 utilization and at the same time to enhance the
                 performance in serving version-range queries.
                 Analytical studies have been conducted on SEMI, and a
                 series of experiments have been performed to evaluate
                 its performance as compared with MVBT under different
                 workloads. The experimental results have demonstrated
                 that SEMI can achieve very high space utilization and
                 has better performance in serving update transactions
                 and range queries as compared with MVBT.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Kartal:2016:MDR,
  author =       "Yusuf Bora Kartal and Ece G{\"u}ran Schmidt and Klaus
                 Werner Schmidt",
  title =        "Modeling Distributed Real-Time Systems in {TIOA} and
                 {UPPAAL}",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "22:1--22:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2964202",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The mission- and life-critical properties of
                 distributed real-time systems require concurrent
                 modeling, analysis, and formal verification in the
                 design stage. The timed input/output automata (TIOA)
                 framework and the UPPAAL software package are two
                 widely used modeling and verification tools for this
                 purpose. To this end, we develop the algorithm
                 TUConvert for converting distributed TIOA models to
                 UPPAAL behavioral models and formally prove its
                 correctness. We demonstrate the applicability of our
                 algorithm by the formal verification of a distributed
                 real-time industrial communication protocol that is
                 modeled by TIOA.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Asyaban:2016:ASB,
  author =       "Sedigheh Asyaban and Mehdi Kargahi and Lothar Thiele
                 and Morteza Mohaqeqi",
  title =        "Analysis and Scheduling of a Battery-Less
                 Mixed-Criticality System with Energy Uncertainty",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "23:1--23:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2964201",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider a battery-less real-time embedded system
                 equipped with an energy harvester. It scavenges energy
                 from an environmental resource according to some
                 stochastic patterns. The success of jobs is threatened
                 in the case of energy shortage, which might be due to
                 lack of harvested energy, losses originated from the
                 super-capacitor self-discharge, as well as power
                 consumption of executed tasks. The periodic real-time
                 tasks of the system follow a dual-criticality model. In
                 addition, each task has a minimum required success
                 ratio that needs to be satisfied in steady state. We
                 analytically evaluate the behavior of such a system in
                 terms of its energy-related success ratio for a given
                 schedule. Based on these results, we propose a
                 scheduling algorithm that satisfies both temporal and
                 success-ratio constraints of the jobs, while respecting
                 task criticalities and corresponding system modes. The
                 accuracy of the analytical method as well as its
                 dependence on the numerical computations and other
                 model assumptions are extensively discussed through
                 comparison with simulation results. Also, the efficacy
                 of the proposed scheduling algorithm is studied through
                 comparison to some existing non-mixed- and
                 mixed-criticality scheduling algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Manna:2016:ITS,
  author =       "Kanchan Manna and Shivam Swami and Santanu
                 Chattopadhyay and Indranil Sengupta",
  title =        "Integrated Through-Silicon Via Placement and
                 Application Mapping for {$3$D} Mesh-Based {NoC}
                 Design",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "24:1--24:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2968446",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a solution to the integrated
                 problem of Through-Silicon Via (TSV) placement and
                 mapping of cores to the routers in a three-dimensional
                 mesh-based Network-on-Chip (NoC) system. TSV geometry
                 restricts their number in three-dimensional (3D) ICs.
                 As a result, only about 25\% of routers in a 3D NoC can
                 possess vertical connections. Mapping plays an
                 important role in evolving good system solutions in
                 such a situation. TSVs have been placed with detailed
                 consultation with the application mapping process. The
                 integrated problem was first solved using the exact
                 method of Integer Liner Programming (ILP). Next, a
                 solution was obtained via a Particle Swarm Optimization
                 (PSO) formulation. Several augmentations to the basic
                 PSO strategy have been proposed to generate
                 good-quality solutions. The results obtained are better
                 than many of the contemporary approaches and close to
                 the theoretical situation in which all routers are 3D
                 in nature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Tajik:2016:SRS,
  author =       "Hossein Tajik and Bryan Donyanavard and Nikil Dutt and
                 Janmartin Jahn and J{\"o}rg Henkel",
  title =        "{SPMPool}: Runtime {SPM} Management for
                 Memory-Intensive Applications in Embedded Many-Cores",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "25:1--25:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2968447",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed Scratchpad Memories (SPMs) in embedded
                 many-core systems require careful selection of data
                 placement to achieve good performance. Applications
                 mapped to these platforms have varying memory
                 requirements based on their runtime behavior, resulting
                 in under- or overutilization of the local SPMs. We
                 propose SPMPool to share the available on-chip SPMs on
                 many-cores among concurrently executing applications in
                 order to reduce the overall memory access latency. By
                 pooling SPM resources, we can assign underutilized
                 memory resources, due to idle cores or low memory
                 usage, to applications dynamically. SPMPool is the
                 first workload-aware SPM mapping solution for
                 many-cores that dynamically allocates data at
                 runtime-using profiled data-to address the
                 unpredictable set of concurrently executing
                 applications. Our experiments on workloads with varying
                 interapplication memory intensity show that SPMPool can
                 achieve up to 76\% reduction in memory access latency
                 for configurations ranging from 16 to 256 cores,
                 compared to the traditional approach that limits
                 executing cores to use their local SPMs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Schurmans:2016:FAE,
  author =       "Stefan Sch{\"u}rmans and Gereon Onnebrink and Rainer
                 Leupers and Gerd Ascheid and Xiaotao Chen",
  title =        "Frequency-Aware {ESL} Power Estimation for {ARM
                 Cortex-A9} Using a Black Box Processor Model",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "26:1--26:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2987375",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Power estimation has become a strongly desired feature
                 in Electronic System Level (ESL) simulations. Most
                 existing power estimation approaches for this
                 abstraction level require component models with
                 observable internals. However, most ESL models of
                 modern processors are delivered as black box
                 components. This work presents a tool-based ESL power
                 estimation methodology for black box models and its
                 extension for multiple clock frequencies. The
                 evaluation uses hardware measurements of the ARM
                 Cortex-A9 subsystem of the OMAP4460 chip for reference.
                 The achieved estimation error is 5\% on average for
                 fixed-frequency power models and 7\% for multifrequency
                 power models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Weinstock:2016:PSS,
  author =       "Jan Henrik Weinstock and Luis Gabriel Murillo and
                 Rainer Leupers and Gerd Ascheid",
  title =        "Parallel {SystemC} Simulation for {ESL} Design",
  journal =      j-TECS,
  volume =       "16",
  number =       "1",
  pages =        "27:1--27:??",
  month =        nov,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2987374",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Nov 3 16:48:38 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Virtual platforms have become essential tools for the
                 design of embedded systems. Developers rely on them for
                 design space exploration and software debugging.
                 However, with rising HW/SW complexity and the need to
                 simulate more and more processors simultaneously, the
                 performance of virtual platforms degrades rapidly.
                 Parallel simulation techniques can help to counter this
                 by leveraging multicore PCs, which are widely available
                 today. This work presents a novel parallel simulation
                 approach that is targeted toward acceleration of
                 virtual platforms from the ESL domain. By trading some
                 timing accuracy, multiprocessor virtual platforms can
                 be accelerated by up to $ 3.4 \times $ on regular
                 quad-core workstations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
  remark =       "Special issue on VIPES, special issue on ICESS2015 and
                 regular papers.",
}

@Article{Shukla:2017:ECC,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Continuing the Course",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3043965",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fischmeister:2017:GES,
  author =       "Sebastian Fischmeister and Jason Xue",
  title =        "Guest Editorial: Special Issue on {LCTES 2015}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3041038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cunha:2017:DSC,
  author =       "Marcos Aur{\'e}lio Pinto Cunha and Omayma Matoussi and
                 Fr{\'e}d{\'e}ric P{\'e}trot",
  title =        "Detecting Software Cache Coherence Violations in
                 {MPSoC} Using Traces Captured on Virtual Platforms",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2990193",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software cache coherence schemes tend to be the
                 solution of choice in dedicated multi/many core systems
                 on chip, as they make the hardware much simpler and
                 predictable. However, despite the developers' effort,
                 it is hard to make sure that all preventive
                 measurements are taken to ensure coherence. In this
                 work, we propose a method to identify the potential
                 cache coherence violations using traces obtained from
                 virtual platforms. These traces contain causality
                 relations among events, which allow first to simplify
                 the analysis, and second to avoid relying on
                 timestamps. Our method identifies potential violations
                 that may occur during a given execution for
                 write-through and write-back cache policies. Therefore,
                 it is independent of the software coherence protocol.
                 We conducted experiments on parallel applications
                 running on a lightweight SMP operating system, and we
                 were able to detect coherence issues that we could then
                 solve.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zheng:2017:DDC,
  author =       "Wenguang Zheng and Hui Wu",
  title =        "Dynamic Data-Cache Locking for Minimizing the {WCET}
                 of a Single Task",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994602",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches have been widely used in modern embedded
                 processors to bridge the increasing speed gap between
                 processors and off-chip memory. In real-time embedded
                 systems, computing the Worst-Case Execution Time (WCET)
                 of a task is essential for the task scheduler to
                 construct a valid schedule for a task set.
                 Unfortunately, caches make it much harder to compute
                 the WCET of a task. Cache locking has been proposed to
                 alleviate the timing unpredictability problem caused by
                 caches. In this article, we investigate the following
                 WCET-aware data-cache locking problem for a single
                 task. Given a task, select a set of variables as locked
                 cache contents such that the WCET of the task is
                 minimized. We propose two dynamic full cache-locking
                 approaches. The first formulates the problem as a
                 global Integer Linear Programming (ILP) problem that
                 simultaneously selects a minimum set of memory blocks
                 of variables as locked cache contents and allocates
                 them to the data cache. The second iteratively
                 constructs a subgraph of the Control Flow Graph (CFG)
                 of the task in which the lengths of all the paths are
                 close to the longest path length, uses an ILP
                 formulation to select a minimum set of memory blocks of
                 variables in the subgraph as locked cache contents, and
                 allocates the selected memory blocks to the data cache.
                 We also propose two novel, efficient data-cache
                 allocation algorithms for the global ILP approach and
                 the iterative ILP approach, respectively. We have
                 implemented both approaches and compared them with two
                 state-of-the-art approaches, the longest path-based
                 dynamic cache-locking approach and the static WCET
                 analysis approach without cache locking by using a set
                 of benchmarks from the M{\"a}lardalen WCET benchmark
                 suite, SNU real-time benchmarks, and Powerstone
                 benchmarks. Compared to the static WCET analysis
                 approach, the average WCET improvements of the first
                 approach range between 11.4\% and 26.4\%. Compared to
                 the longest path--based, dynamic cache-locking
                 approach, the average WCET improvements of the first
                 approach range between 5.0\% and 15.4\%. The second
                 approach performs slightly better than the first
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:CDS,
  author =       "Qingrui Liu and Changhee Jung and Dongyoon Lee and
                 Devesh Tiwari",
  title =        "Compiler-Directed Soft Error Detection and Recovery to
                 Avoid {DUE} and {SDC} via {Tail-DMR}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents Clover, a compiler-directed soft
                 error detection and recovery scheme for lightweight
                 soft error resilience. The compiler carefully generates
                 soft-error-tolerant code based on idempotent processing
                 without explicit checkpoints. During program execution,
                 Clover relies on a small number of acoustic wave
                 detectors deployed in the processor to identify soft
                 errors by sensing the wave made by a particle strike.
                 To cope with DUEs (detected unrecoverable errors)
                 caused by the sensing latency of error detection,
                 Clover leverages a novel selective instruction
                 duplication technique called tail-DMR (dual modular
                 redundancy) that provides a region-level error
                 containment. Once a soft error is detected by either
                 the sensors or the tail-DMR, Clover takes care of the
                 error as in the case of exception handling. To recover
                 from the error, Clover simply redirects program control
                 to the beginning of the code region where the error is
                 detected. The experimental results demonstrate that the
                 average runtime overhead is only 26\%, which is a 75\%
                 reduction compared to that of the state-of-the-art soft
                 error resilience technique. In addition, this article
                 evaluates an alternative technique called tail-wait,
                 comparing it to Clover. According to the evaluation
                 with the different processor configurations and the
                 various error detection latencies, Clover turns out to
                 be a superior technique, achieving 1.06 to 3.49 $
                 \times $ speedup over the tail-wait.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Procter:2017:PAS,
  author =       "Adam Procter and William L. Harrison and Ian Graves
                 and Michela Becchi and Gerard Allwein",
  title =        "A Principled Approach to Secure Multi-core Processor
                 Design with {ReWire}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2967497",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There is no such thing as high assurance without high
                 assurance hardware. High assurance hardware is
                 essential because any and all high assurance systems
                 ultimately depend on hardware that conforms to, and
                 does not undermine, critical system properties and
                 invariants. And yet, high assurance hardware
                 development is stymied by the conceptual gap between
                 formal methods and hardware description languages used
                 by engineers. This article advocates a
                 semantics-directed approach to bridge this conceptual
                 gap. We present a case study in the design of secure
                 processors, which are formally derived via principled
                 techniques grounded in functional programming and
                 equational reasoning. The case study comprises the
                 development of secure single- and dual-core variants of
                 a single processor, both based on a common semantic
                 specification of the ISA. We demonstrate via formal
                 equational reasoning that the dual-core processor
                 respects a ``no-write-down'' information flow policy.
                 The semantics-directed approach enables a modular and
                 extensible style of system design and verification. The
                 secure processors require only a very small amount of
                 additional code to specify and implement, and their
                 security verification arguments are concise and
                 readable. Our approach rests critically on ReWire, a
                 functional programming language providing a suitable
                 foundation for formal verification of hardware designs.
                 This case study demonstrates both ReWire's
                 expressiveness as a programming language and its power
                 as a framework for formal, high-level reasoning about
                 hardware systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2017:ESS,
  author =       "Li-Pin Chang and Po-Han Sung and Po-Tsang Chen and
                 Po-Hung Chen",
  title =        "Eager Synching: a Selective Logging Strategy for Fast
                 {\tt fsync()} on Flash-Based {Android} Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930668",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Flash storage has been a standard component in Android
                 devices. Recent research has reported that application
                 data management in Android involves frequent fsync()
                 operations. The current fsync() implementations,
                 including those of ext4 and F2FS, have several common
                 drawbacks. Specifically, ext4 commits a transaction
                 every time to sync a file, whereas F2FS commits a
                 checkpoint to sync a directory. Committing a
                 transaction or checkpoint flushes all dirty data from
                 the page cache to the flash storage via many small,
                 random block write requests. The resultant high I/O
                 frequency and excessive write traffic cause a high
                 fsync() latency. This study presents an efficient
                 fsync() method, called eager synching, which is based
                 on a simple idea: write less, and write sequentially.
                 To sync a file, eager synching writes only a subset of
                 all dirty data in the page cache to a sequential log
                 space using a few sequential block write requests. It
                 does not involve transaction or checkpoint committing.
                 We successfully implemented eager synching in ext4 and
                 F2FS, and our experimental results show that, compared
                 with the original fsync() methods of ext4 and F2FS,
                 eager synching reduced the average and maximum fsync()
                 latencies by up to 72\% and 91\%, respectively,
                 block-level write traffic by up to 35\%, and I/O
                 frequency by up to 66\%. Through enhanced crash
                 recovery procedures, eager synching can successfully
                 recover all previously synched files while still
                 guaranteeing the file system integrity. We also
                 conducted live application replays using the proposed
                 eager synching approach and observed that this approach
                 significantly improved the application frame updating
                 rate and application execution time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dietrich:2017:GOF,
  author =       "Christian Dietrich and Martin Hoffmann and Daniel
                 Lohmann",
  title =        "Global Optimization of Fixed-Priority Real-Time
                 Systems by {RTOS}-Aware Control-Flow Analysis",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2950053",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber--physical systems typically target a dedicated
                 purpose; their embedded real-time control system, such
                 as an automotive control unit, is designed with a
                 well-defined set of functionalities. On the software
                 side, this results in a large amount of implicit and
                 explicit static knowledge about the system and its
                 behavior already at compile time. Compilers have become
                 increasingly better at extracting and exploiting such
                 static knowledge. For instance, many optimizations have
                 been lifted up to the interprocedural or even to the
                 whole-program level. However, whole-program
                 optimizations generally stop at the application--kernel
                 boundary: control-flow transitions between different
                 threads are not yet analyzed. In this article, we cross
                 the application--kernel boundary by combining the
                 semantics of a real-time operating system (RTOS) with
                 deterministic fixed-priority scheduling (e.g.,
                 OSEK/AUTOSAR, ARINC 653, $ \mu $ITRON, POSIX.4) and the
                 explicit application knowledge to enable system-wide,
                 flow-sensitive compiler optimizations. We present two
                 methods to extract a cross-kernel, control-flow--graph
                 that provides a global view on all possible execution
                 paths of a real-time system. Having this knowledge at
                 hand, we tailor the operating system kernel more
                 closely to the particular application scenario. For the
                 example of a real-world safety-critical control system,
                 we present three possible use cases. (1) Runtime
                 optimizations, by means of specialized system calls for
                 each call site, allow one speed up the kernel execution
                 path by 28\% in our benchmark scenario. Furthermore, we
                 target transient hardware fault tolerance with two
                 automated software-based countermeasures: (2)
                 generation of OS state assertions on the expected
                 system behavior, and (3) a system-wide dominator-region
                 based control-flow error detection, both of which
                 leverage significant robustness improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:MCS,
  author =       "Jing Liu and Kenli Li and Dakai Zhu and Jianjun Han
                 and Keqin Li",
  title =        "Minimizing Cost of Scheduling Tasks on Heterogeneous
                 Multicore Embedded Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2935749",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cost savings are very critical in modern heterogeneous
                 computing systems, especially in embedded systems. Task
                 scheduling plays an important role in cost savings. In
                 this article, we tackle the problem of scheduling tasks
                 on heterogeneous multicore embedded systems with the
                 constraints of time and resources for minimizing the
                 total cost, while considering the communication
                 overhead. This problem is NP-hard and we propose
                 several heuristic techniques- ISGG, RLD, and RLDG -to
                 address the problem. Experimental results show that the
                 proposed algorithms significantly outperform the
                 existing approaches in terms of cost savings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Llopard:2017:FPA,
  author =       "Ivan Llopard and Christian Fabre and Albert Cohen",
  title =        "From a Formalized Parallel Action Language to Its
                 Efficient Code Generation",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2990195",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modeling languages propose convenient abstractions and
                 transformations to handle the complexity of today's
                 embedded systems. Based on the formalism of the
                 Hierarchical State Machine, they enable the expression
                 of hierarchical control parallelism. However, they face
                 two important challenges when it comes to modeling
                 data-intensive applications: no unified approach that
                 also accounts for data-parallel actions and no
                 effective code optimization and generation flows. We
                 propose a modeling language extended with parallel
                 action semantics and hierarchical indexed-state
                 machines suitable for computationally intensive
                 applications. Together with its formal semantics, we
                 present an optimizing model compiler aiming for the
                 generation of efficient data-parallel
                 implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Manilov:2017:FRS,
  author =       "Stanislav Manilov and Bj{\"o}rn Franke and Anthony
                 Magrath and Cedric Andrieu",
  title =        "{Free Rider}: a Source-Level Transformation Tool for
                 Retargeting Platform-Specific Intrinsic Functions",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2990194",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Short-vector S imd and Dsp instructions are popular
                 extensions to common Isas. These extensions deliver
                 excellent performance and compact code for some
                 compute-intensive applications, but they require
                 specialized compiler support. To enable the programmer
                 to explicitly request the use of such an instruction,
                 many C compilers provide platform-specific intrinsic
                 functions, whose implementation is handled specially by
                 the compiler. The use of such intrinsics, however,
                 inevitably results in nonportable code. In this
                 article, we develop a novel methodology for retargeting
                 such nonportable code, which maps intrinsics from one
                 platform to another, taking advantage of similar
                 intrinsics on the target platform. We employ a
                 description language to specify the signature and
                 semantics of intrinsics and perform graph-based pattern
                 matching and high-level code transformations to derive
                 optimized implementations exploiting the target's
                 intrinsics, wherever possible. We demonstrate the
                 effectiveness of our new methodology, implemented in
                 the Free Rider tool, by automatically retargeting
                 benchmarks derived from OpenCV samples and a complex
                 embedded application optimized to run on an Arm
                 Cortex-M4 to an Intel Edison module with Sse4.2
                 instructions (and vice versa). We achieve a speedup of
                 up to 3.73 over a plain C baseline, and on average
                 96.0\% of the speedup of manually ported and optimized
                 versions of the benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zeng:2017:SLD,
  author =       "Jing Zeng and Laurence T. Yang and Man Lin and Zili
                 Shao and Dakai Zhu",
  title =        "System-Level Design Optimization for Security-Critical
                 Cyber-Physical-Social Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "39:1--39:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2925991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical-social systems (CPSS), an emerging
                 computing paradigm, have attracted intensive attentions
                 from the research community and industry. We are facing
                 various challenges in designing secure, reliable, and
                 user-satisfied CPSS. In this article, we consider these
                 design issues as a whole and propose a system-level
                 design optimization framework for CPSS design where
                 energy consumption, security-level, and user
                 satisfaction requirements can be fulfilled while
                 satisfying constraints for system reliability.
                 Specifically, we model the constraints (energy
                 efficiency, security, and reliability) as the penalty
                 functions to be incorporated into the corresponding
                 objective functions for the optimization problem. A
                 smart office application is presented to demonstrate
                 the feasibility and effectiveness of our proposed
                 design optimization approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Haar:2017:MGE,
  author =       "Stefan Haar and Roland Meyer",
  title =        "Message from the {Guest Editors}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "40:1--40:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3037413",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bujtor:2017:TPD,
  author =       "Ferenc Bujtor and Lev Sorokin and Walter Vogler",
  title =        "Testing Preorders for {dMTS}: Deadlock- and the New
                 {Deadlock-\slash Divergence Testing}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "41:1--41:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2984641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Testing preorders on component specifications ensure
                 that replacing a specification by a refined one does
                 not introduce unwanted behavior in an overall system.
                 Considering deadlocks as unwanted, the preorder can be
                 characterized by a failure semantics on Labeled
                 Transition Systems (LTSs). In previous work, we have
                 generalized this to Modal Transition Systems (MTSs)
                 with a new, MTS-specific testing idea. In the present
                 article, we generalize this idea further to DMTS, a
                 subclass of disjunctive MTSs. On the one hand, the
                 testing preorder can be characterized by the same
                 failure semantics, and dMTS have no additional
                 expressivity in our setting. On the other hand, the
                 technical treatment is significantly harder and,
                 surprisingly, the preorder is not compositional.
                 Furthermore, we regard deadlocks and divergence
                 (infinite unobservable runs) as unwanted and
                 characterize the testing preorder with an unusual
                 failure-divergence semantics. This preorder is already
                 on LTSs strictly coarser-and hence arguably better-than
                 the traditional failure-divergence preorder. It is a
                 precongruence on dMTS, also for hiding, and much easier
                 to handle than the deadlock-based preorder. It arises
                 as well from a new variant of De Nicola's and
                 Hennessy's must-testing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vijzelaar:2017:MVS,
  author =       "Stefan Vijzelaar and Wan Fokkink",
  title =        "Multi-valued Simulation and Abstraction Using Lattice
                 Operations",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "42:1--42:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012282",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Abstractions can cause spurious results, which need to
                 be verified in the concrete system to gain conclusive
                 results. Verification based on a multi-valued logic can
                 distinguish between conclusive and inconclusive
                 results, provides increased precision, and allows for
                 encoding additional information into the model. To
                 ensure a correct abstraction, one can use a mixed
                 simulation [Meller et al. 2009]. We extend mixed
                 simulation to include inconsistent values, thereby
                 resolving an asymmetry and allowing for abstractions
                 with increased precision when inconsistent values are
                 available. In addition, we present a set of abstraction
                 rules, compatible with the extended notion, for
                 constructing abstract models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Andre:2017:PPO,
  author =       "{\'E}tienne Andr{\'e} and Thomas Chatain and C{\'e}sar
                 Rodr{\'\i}guez",
  title =        "Preserving Partial-Order Runs in Parametric Time
                 {Petri} Nets",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "43:1--43:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012283",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Parameter synthesis for timed systems aims at deriving
                 parameter valuations satisfying a given property. In
                 this article, we target concurrent systems. We use
                 partial-order semantics for parametric time Petri nets
                 as a way to both cope with the well-known state-space
                 explosion due to concurrency and significantly enhance
                 the result of an existing synthesis algorithm. Given a
                 reference parameter valuation, our approach synthesizes
                 other valuations preserving the partial-order
                 executions of the reference parameter valuation. We
                 show the applicability of our approach using a tool
                 applied to asynchronous circuits.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Berard:2017:NIP,
  author =       "B{\'e}atrice B{\'e}rard and Lo{\"\i}c H{\'e}lou{\"e}t
                 and John Mullins",
  title =        "Non-interference in Partial Order Models",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "44:1--44:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2984639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Non-interference (NI) is a property of systems stating
                 that confidential actions should not cause effects
                 observable by unauthorized users. Several variants of
                 NI have been studied for many types of models but
                 rarely for true concurrency or unbounded models. This
                 work investigates NI for High-level Message Sequence
                 Charts (HMSCs), a scenario language for the description
                 of distributed systems, based on composition of partial
                 orders. We first propose a general definition of
                 security properties in terms of equivalence among
                 observations of behaviors. Observations are naturally
                 captured by partial order automata, a formalism that
                 generalizes HMSCs and permits assembling partial
                 orders. We show that equivalence or inclusion
                 properties for HMSCs (and hence for partial order
                 automata) are undecidable, which means in particular
                 that NI is undecidable for HMSCs. We hence consider
                 decidable subclasses of partial order automata and
                 HMSCs. Finally, we define weaker local properties,
                 describing situations where a system is attacked by a
                 single agent, and show that local NI is decidable. We
                 then refine local NI to a finer notion of causal NI
                 that emphasizes causal dependencies between
                 confidential actions and observations and extend it to
                 causal NI with (selective) declassification of
                 confidential events. Checking whether a system
                 satisfies local and causal NI and their declassified
                 variants are PSPACE-complete problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saarikivi:2017:MTS,
  author =       "Olli Saarikivi and Hern{\'a}n Ponce-De-Le{\'o}n and
                 Kari K{\"a}hk{\"o}nen and Keijo Heljanko and Javier
                 Esparza",
  title =        "Minimizing Test Suites with Unfoldings of
                 Multithreaded Programs",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "45:1--45:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012281",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article focuses on computing minimal test suites
                 for multithreaded programs. Based on previous work on
                 test case generation for multithreaded programs using
                 unfoldings, this article shows how this unfolding can
                 be used to generate minimal test suites covering all
                 local states of the program. Generating such minimal
                 test suites is shown to be NP-complete in the size of
                 the unfolding. We propose an SMT encoding for this
                 problem and two methods based on heuristics which only
                 approximate the solution, but scale better in practice.
                 Finally, we apply our methods to compute the minimal
                 test suites for several benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Valmari:2017:SIS,
  author =       "Antti Valmari",
  title =        "Stop It, and Be Stubborn!",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "46:1--46:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012279",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This publication discusses how automatic verification
                 of concurrent systems can be made more efficient by
                 focusing on always may-terminating systems. First,
                 making a system always may-terminating is a method for
                 meeting a modelling need that exists independently of
                 this publication. It is illustrated that without doing
                 so, non-progress errors may be lost. Second, state
                 explosion is often alleviated with stubborn, ample, and
                 persistent set methods. They use expensive cycle or
                 terminal strong component conditions in many cases. It
                 is proven that for many important classes of
                 properties, if the systems are always may-terminating,
                 then these conditions can be left out.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Isenberg:2017:IIV,
  author =       "Tobias Isenberg",
  title =        "Incremental Inductive Verification of Parameterized
                 Timed Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "47:1--47:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2984640",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose and extend an approach for the verification
                 of safety properties for parameterized timed systems
                 modeled as networks of timed automata. For this task,
                 we introduce an incremental workflow that is based on
                 our algorithm IC3 with Zones. It proceeds in a cycle in
                 which single models of the system are verified, and the
                 verification results are employed for the reasoning
                 about the entire system. Starting with the smallest
                 instances, the verification of the safety property is
                 carried out fast and efficient. On successful
                 verification, the algorithm produces an inductive
                 strengthening of the safety property. We reuse this
                 result and try to reason about the entire parameterized
                 timed system. To this end, we extrapolate the inductive
                 strengthening into a candidate for the next-larger
                 model. In case this candidate is a valid inductive
                 strengthening for the next larger model, our main
                 theorem reasons about all models of the parameterized
                 timed system, stating that the safety property holds
                 true for all models. Otherwise, the main cycle starts
                 over with the verification of the next larger model.
                 This workflow is iterated indefinitely, until able to
                 reason about the entire parameterized timed system,
                 until a counterexample trace is found, or until the
                 single models become too large to be handled in the
                 verification. We reuse the intermediate results in a
                 Feedback -loop in order to accelerate the verification
                 runs for the single models. Furthermore, we consider an
                 extended formalism in comparison to our previous
                 publications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Siirtola:2017:WDW,
  author =       "Antti Siirtola and Stavros Tripakis and Keijo
                 Heljanko",
  title =        "When Do We Not Need Complex Assume-Guarantee Rules?",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "48:1--48:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012280",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We study the need for complex circular
                 assume-guarantee (AG) rules in formalisms that already
                 provide the simple precongruence rule. We first
                 investigate the question for two popular formalisms:
                 Labeled Transition Systems (LTSs) with weak simulation
                 and Interface Automata (IA) with alternating
                 simulation. We observe that, in LTSs, complex circular
                 AG rules cannot always be avoided, but, in the IA
                 world, the simple precongruence rule is all we need.
                 Based on these findings, we introduce modal IA with cut
                 states, a novel formalism that not only generalizes IA
                 and LTSs but also allows for compositional reasoning
                 without complex AG rules.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tang:2017:TFC,
  author =       "Qi Tang and Twan Basten and Marc Geilen and Sander
                 Stuijk and Ji-Bo Wei",
  title =        "{Task-FIFO} Co-Scheduling of Streaming Applications on
                 {MPSoCs} with Predictable Memory Hierarchy",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "49:1--49:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3038484",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article studies the scheduling of real-time
                 streaming applications on multiprocessor
                 systems-on-chips with predictable memory hierarchy. An
                 iteration-based task-FIFO co-scheduling framework is
                 proposed for this problem. We obtain FIFO size
                 distributions using Pareto space searching, based on
                 which the task-to-processor mapping is obtained with
                 the potential FIFO allocation being taken into account;
                 then, the FIFO-to-memory allocation is optimized to
                 minimize the total memory access cost; finally, a
                 self-timed throughput analysis method that considers
                 memory and direct memory access controller contention
                 is utilized to analyze the throughput. Our methods are
                 validated by a set of synthesized and practical
                 applications on different platforms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Choo:2017:EDF,
  author =       "Kim-Kwang Raymond Choo and Yunsi Fei and Yang Xiang
                 and Yu Yu",
  title =        "Embedded Device Forensics and Security",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "50:1--50:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3015662",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While the increasing digitalization of our society and
                 amalgamation of embedded devices into the
                 ever-increasing facets of our daily life (e.g., in
                 smart and intelligent vehicles, smart cities and smart
                 nations, and critical infrastructure sectors) have
                 resulted in improved productivity and quality of life,
                 the trend has also resulted in a trend of increasing
                 frequency and sophistication of cyber exploitation and
                 cyber threats. Hence, there is a need for coordinated
                 efforts from the research community to address
                 resulting concerns using both cryptographic and
                 non-cryptographic solutions, such as those presented in
                 this special section.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Puthal:2017:DDK,
  author =       "Deepak Puthal and Surya Nepal and Rajiv Ranjan and
                 Jinjun Chen",
  title =        "{DLSeF}: a Dynamic Key-Length-Based Efficient
                 Real-Time Security Verification Model for Big Data
                 Stream",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "51:1--51:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2937755",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Applications in risk-critical domains such as
                 emergency management and industrial control systems
                 need near-real-time stream data processing in
                 large-scale sensing networks. The key problem is how to
                 ensure online end-to-end security (e.g.,
                 confidentiality, integrity, and authenticity) of data
                 streams for such applications. We refer to this as an
                 online security verification problem. Existing data
                 security solutions cannot be applied in such
                 applications as they cannot deal with data streams with
                 high-volume and high-velocity data in real time. They
                 introduce a significant buffering delay during security
                 verification, resulting in a requirement for a large
                 buffer size for the stream processing server. To
                 address this problem, we propose a Dynamic
                 Key-Length-Based Security Framework (DLSeF) based on a
                 shared key derived from synchronized prime numbers; the
                 key is dynamically updated at short intervals to thwart
                 potential attacks to ensure end-to-end security.
                 Theoretical analyses and experimental results of the
                 DLSeF framework show that it can significantly improve
                 the efficiency of processing stream data by reducing
                 the security verification time and buffer usage without
                 compromising security.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Song:2017:SSI,
  author =       "Jun Song and Fan Yang and Kim-Kwang Raymond Choo and
                 Zhijian Zhuang and Lizhe Wang",
  title =        "{SIPF}: a Secure Installment Payment Framework for
                 Drive-Thru {Internet}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "52:1--52:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3014584",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Ensuring the security and privacy of vehicular ad hoc
                 networks (VANETs) and related services such as secure
                 payment has been the focus of recent research efforts.
                 Existing secure payment solutions generally require
                 stable and reliable network connection. This is,
                 however, a challenge in a VANET setting. Drive-thru
                 Internet, a secure payment solution for VANETs,
                 involves a great number of fast-moving vehicles
                 competing for connections/communications
                 simultaneously. Thus, service providers may find it
                 challenging to provide real-time payment services or
                 may have to sacrifice the confidentiality and the
                 authenticity of payment vouchers for usability. In this
                 article, we propose a secure installment payment
                 framework for drive-thru Internet deployment in a VANET
                 setting. The framework also provides the capability to
                 embody properties such as confidentiality of payment
                 vouchers, offline signature verification, periodical
                 reconciliation, and installment payment. Performance
                 evaluation and security analysis demonstrate the
                 utility of the framework in a VANET setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:EEC,
  author =       "Zhe Liu and Jian Weng and Zhi Hu and Hwajeong Seo",
  title =        "Efficient Elliptic Curve Cryptography for Embedded
                 Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "53:1--53:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2967103",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many resource-constrained embedded devices, such as
                 wireless sensor nodes, require public key encryption or
                 a digital signature, which has induced plenty of
                 research on efficient and secure implementation of
                 elliptic curve cryptography (ECC) on 8-bit processors.
                 In this work, we study the suitability of a special
                 class of finite fields, called optimal prime fields
                 (OPFs), for a ``lightweight'' ECC implementation with a
                 view toward high performance and security. First, we
                 introduce a highly optimized arithmetic library for
                 OPFs that includes two implementations for each finite
                 field arithmetic operation, namely a
                 performance-optimized version and a security-optimized
                 variant. The latter is resistant against simple power
                 analysis attacks in the sense that it always executes
                 the same sequence of instructions, independent of the
                 operands. Based on this OPF library, we then describe a
                 performance-optimized and a security-optimized
                 implementation of scalar multiplication on the elliptic
                 curve over OPFs at several security levels. The former
                 uses the Gallant-Lambert-Vanstone method on twisted
                 Edwards curves and reaches an execution time of 3.14M
                 cycles (over a 160-bit OPF) on an 8-bit ATmega128
                 processor, whereas the latter is based on a Montgomery
                 curve and executes in 5.53M cycles.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fu:2017:DFA,
  author =       "Shan Fu and Guoai Xu and Juan Pan and Zongyue Wang and
                 An Wang",
  title =        "Differential Fault Attack on {ITUbee} Block Cipher",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "54:1--54:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2967610",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Differential Fault Attack (DFA) is a powerful
                 cryptanalytic technique to retrieve secret keys by
                 exploiting the faulty ciphertexts generated during
                 encryption procedure. This article proposes a novel DFA
                 attack that is effective on ITUbee, a software-oriented
                 block cipher for resource-constrained devices.
                 Different from other DFA, our attack makes use of not
                 only faulty values, but also differences between
                 fault-free intermediate values corresponding to 2
                 plaintexts, which combine traditional differential
                 analysis with DFA. The possible injection positions
                 with different number of faults are discussed. The most
                 efficient attack takes 2$^{25}$ round function
                 operations with 4 faults, which is achieved in a few
                 seconds on a PC.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2017:RNF,
  author =       "Yang Li and Mengting Chen and Zhe Liu and Jian Wang",
  title =        "Reduction in the Number of Fault Injections for Blind
                 Fault Attack on {SPN} Block Ciphers",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "55:1--55:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3014583",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In 2014, a new fault analysis called blind fault
                 attack (BFA) was proposed, in which attackers can only
                 obtain the number of different faulty outputs without
                 knowing the public data. The original BFA requires
                 480,000 fault injections to recover a 128-bit AES key.
                 This work attempts to reduce the number of fault
                 injections under the same attack assumptions. We
                 analyze BFA from an information theoretical perspective
                 and introduce a new probability-based distinguisher.
                 Three approaches are proposed for different attack
                 scenarios. The best one realized a 66.8\% reduction of
                 the number of fault injections on AES.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Castiglione:2017:BFI,
  author =       "Arcangelo Castiglione and Raffaele Pizzolante and
                 Francesco Palmieri and Barbara Masucci and Bruno
                 Carpentieri and Alfredo {De Santis} and Aniello
                 Castiglione",
  title =        "On-Board Format-Independent Security of Functional
                 Magnetic Resonance Images",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "56:1--56:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2893474",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Functional magnetic resonance imaging (fMRI) provides
                 an effective and noninvasive tool for researchers to
                 understand cerebral functions and correlate them with
                 brain activities. In addition, with the ever-increasing
                 diffusion of the Internet, such images may be exchanged
                 in several ways, allowing new research and medical
                 services. On the other hand, ensuring the security of
                 exchanged fMRI data becomes a main concern due to their
                 special characteristics arising from strict ethics and
                 legislative and diagnostic implications. Again, the
                 risks increase when dealing with open environments like
                 the Internet. For this reason, security mechanisms that
                 ensure protection of such data are strongly required.
                 However, we remark that the mechanisms commonly
                 employed for data protection are doomed to fail when
                 dealing with imaging data. In this article, we propose
                 a novel watermarking scheme explicitly addressed for
                 this type of imaging. Such a scheme can be used for
                 several purposes, particularly to ensure authenticity
                 and integrity. Moreover, we show how to integrate our
                 scheme within commercial off-the-shelf fMRI system.
                 Finally, the validity and the efficiency of our scheme
                 has been assessed through testing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:PMH,
  author =       "Jianghua Liu and Jinhua Ma and Wei Wu and Xiaofeng
                 Chen and Xinyi Huang and Li Xu",
  title =        "Protecting Mobile Health Records in Cloud Computing: a
                 Secure, Efficient, and Anonymous Design",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "57:1--57:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983625",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Electronic healthcare (eHealth) systems have replaced
                 traditional paper-based medical systems due to
                 attractive features such as universal accessibility,
                 high accuracy, and low cost. As a major constituent
                 part of eHealth systems, mobile healthcare (mHealth)
                 applies Mobile Internet Devices (MIDs) and Embedded
                 Devices (EDs), such as tablets, smartphones, and other
                 devices embedded in the bodies of individuals, to
                 improve the quality of life and provide more convenient
                 healthcare services for patients. Unfortunately, MIDs
                 and EDs have only limited computational capacity,
                 storage space, and power supply. By taking this into
                 account, we present a new design to guarantee the
                 integrity of eHealth records and the anonymity of the
                 data owner in a more efficient and flexible way. The
                 essence of our design is a general method which can
                 convert any secure Attribute-Based Signature (ABS)
                 scheme into a highly efficient and secure
                 Online/Offline Attribute-Based Signature (OOABS)
                 scheme. We prove the security and analyze the
                 efficiency improvement of the new design. Additionally,
                 we illustrate the proposed generic construction by
                 applying it to a specific ABS scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:SRS,
  author =       "Wei Wang and Peng Xu and Laurence Tianruo Yang and
                 Willy Susilo and Jinjun Chen",
  title =        "Securely Reinforcing Synchronization for Embedded
                 Online Contests",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "58:1--58:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2899000",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "When competing in eBay bidding, online games, or
                 e-exams in embedded computing environments, people
                 naturally face asynchronous starts from different
                 computing devices, which is treated as a security risk
                 of online contests. The security risks of online
                 contests also include eavesdropping during data
                 transmission without intended rights, and false starts
                 by malicious competitors, which also means asynchrony
                 in contests. Accordingly, online contests need security
                 guarantees, especially on synchronization. In this
                 article, for synchronic and secure starts in a contest,
                 we update security requirements of confidentiality,
                 anonymity, and synchrony, comparing the current work to
                 our previous work. Based on the updated requirements,
                 we propose a general framework for the Advanced Secure
                 Synchronized Reading (ASSR) system, which can hold
                 multiple contests simultaneously in the cloud. It is
                 important to note that the system can ignore the
                 impacts of heterogeneity among competitors. Considering
                 the heterogeneity both on transmission and computing,
                 we construct a novel Randomness-reused Identity Based
                 Key Encapsulation Mechanism (RIBKEM) to support
                 separable decapsulation, which can shorten both
                 decryption delay and transmission delay with the best
                 efforts. Finally, ASSR enhances synchronization
                 achievement for contest starts with heterogeneous
                 delays of competitors while satisfying other security
                 requirements. As a complement, the analysis on the
                 provable security of ASSR is given, as well as a
                 further analysis on the achievement of
                 synchronization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mozaffari-Kermani:2017:FDA,
  author =       "Mehran Mozaffari-Kermani and Reza Azarderakhsh and
                 Anita Aghaie",
  title =        "Fault Detection Architectures for Post-Quantum
                 Cryptographic Stateless Hash-Based Secure Signatures
                 Benchmarked on {ASIC}",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "59:1--59:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930664",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Symmetric-key cryptography can resist the potential
                 post-quantum attacks expected with the not-so-faraway
                 advent of quantum computing power. Hash-based,
                 code-based, lattice-based, and multivariate-quadratic
                 equations are all other potential candidates, the merit
                 of which is that they are believed to resist both
                 classical and quantum computers, and applying ``Shor's
                 algorithm''-the quantum-computer discrete-logarithm
                 algorithm that breaks classical schemes-to them is
                 infeasible. In this article, we propose, assess, and
                 benchmark reliable constructions for stateless
                 hash-based signatures. Such architectures are believed
                 to be one of the prominent post-quantum schemes,
                 offering security proofs relative to plausible
                 properties of the hash function; however, it is well
                 known that their confidentiality does not guarantee
                 reliable architectures in the presence natural and
                 malicious faults. We propose and benchmark fault
                 diagnosis methods for this post-quantum cryptography
                 variant through case studies for hash functions and
                 present the simulations and implementations results
                 (through application-specific integrated circuit
                 evaluations) to show the applicability of the presented
                 schemes. The proposed approaches make such hash-based
                 constructions more reliable against natural faults and
                 help protecting them against malicious faults and can
                 be tailored based on the resources available and for
                 different reliability objectives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gai:2017:SES,
  author =       "Keke Gai and Longfei Qiu and Min Chen and Hui Zhao and
                 Meikang Qiu",
  title =        "{SA--EAST}: Security-Aware Efficient Data Transmission
                 for {ITS} in Mobile Heterogeneous Cloud Computing",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "60:1--60:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2979677",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The expected advanced network explorations and the
                 growing demand for mobile data sharing and transferring
                 have driven numerous novel applications in
                 Cyber-Physical Systems (CPSs), such as Intelligent
                 Transportation Systems (ITSs). However, current ITS
                 implementations are restricted by the conflicts between
                 security and communication efficiency. Focusing on this
                 issue, this article proposes a Security-Aware Efficient
                 Data Sharing and Transferring (SA-EAST) model, which is
                 designed for securing cloud-based ITS implementations.
                 In applying this approach, we aim to obtain secure
                 real-time multimedia data sharing and transferring. Our
                 experimental evaluation has shown that our proposed
                 model provides an effective performance in securing
                 communications for ITS.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shu:2017:WDD,
  author =       "Junliang Shu and Yuanyuan Zhang and Juanru Li and
                 Bodong Li and Dawu Gu",
  title =        "Why Data Deletion Fails? {A} Study on Deletion Flaws
                 and Data Remanence in {Android} Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "2",
  pages =        "61:1--61:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3007211",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Smart mobile devices are becoming the main vessel of
                 personal privacy information. While they carry valuable
                 information, data erasure is somehow much more
                 vulnerable than was predicted. The security mechanisms
                 provided by the Android system are not flexible enough
                 to thoroughly delete sensitive data. In addition to the
                 weakness among several provided data-erasing and
                 file-deleting mechanisms, we also target the Android OS
                 design flaws in data erasure, and unveil that the
                 design of the Android OS contradicts some secure
                 data-erasure demands. We present the data-erasure flaws
                 in three typical scenarios on mainstream Android
                 devices, such as the data clearing flaw, application
                 uninstallation flaw, and factory reset flaw. Some of
                 these flaws are inherited data-deleting security issues
                 from the Linux kernel, and some are new vulnerabilities
                 in the Android system. Those scenarios reveal the data
                 leak points in Android systems. Moreover, we reveal
                 that the data remanence on the disk is rarely affected
                 by the user's daily operation, such as file deletion
                 and app installation and uninstallation, by a
                 real-world data deletion latency experiment. After one
                 volunteer used the Android phone for 2 months, the data
                 remanence amount was still considerable. Then, we
                 proposed DataRaider for file recovering from disk
                 fragments. It adopts a file-carving technique and is
                 implemented as an automated sensitive information
                 recovering framework. DataRaider is able to extract
                 private data in a raw disk image without any file
                 system information, and the recovery rate is
                 considerably high in the four test Android phones. We
                 propose some mitigation for data remanence issues, and
                 give the users some suggestions on data protection in
                 Android systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2017:ECS,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Cyber Security, {IoT}, Block Chains-Risks
                 and Opportunities",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "62:1--62:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3087913",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wolf:2017:GES,
  author =       "Marilyn Wolf and Jason Xue",
  title =        "Guest Editorial: Special Issue on Embedded Computing
                 for {IoT}",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "63:1--63:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3065713",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ateniese:2017:LCS,
  author =       "Giuseppe Ateniese and Giuseppe Bianchi and Angelo T.
                 Capossele and Chiara Petrioli and Dora Spenza",
  title =        "Low-Cost Standard Signatures for Energy-Harvesting
                 Wireless Sensor Networks",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "64:1--64:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994603",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This work is motivated by a general question: can
                 micro-scale energy-harvesting techniques be exploited
                 to support low-cost standard security solutions on
                 resource-constrained devices? We focus on guaranteeing
                 integrity and authentication in Internet of Things
                 (IoT) and Wireless Sensor Network (WSN) applications.
                 In this article, we propose techniques to make ECDSA
                 signatures low cost and implementable on
                 resource-constrained devices. By combining
                 precomputation techniques and energy-harvesting
                 capabilities of modern sensor nodes, we achieve
                 significant improvement over prior works. In addition,
                 we show that the cost of ECDSA signatures can be
                 reduced by up to a factor 10 by using harvesting-aware
                 optimizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jayakumar:2017:EAM,
  author =       "Hrishikesh Jayakumar and Arnab Raha and Jacob R.
                 Stevens and Vijay Raghunathan",
  title =        "Energy-Aware Memory Mapping for Hybrid {FRAM--SRAM}
                 {MCUs} in Intermittently-Powered {IoT} Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "65:1--65:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983628",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Forecasts project that by 2020, there will be around
                 50 billion devices connected to the Internet of Things
                 (IoT), most of which will operate untethered and
                 unplugged. While environmental energy harvesting is a
                 promising solution to power these IoT edge devices, it
                 introduces new complexities due to the unreliable
                 nature of ambient energy sources. In the presence of an
                 unreliable power supply, frequent checkpointing of the
                 system state becomes imperative, and recent research
                 has proposed the concept of in-situ checkpointing by
                 using ferroelectric RAM (FRAM), an emerging
                 non-volatile memory technology, as unified memory in
                 these systems. Even though an entirely FRAM-based
                 solution provides reliability, it is energy inefficient
                 compared to SRAM due to the higher access latency of
                 FRAM. On the other hand, an entirely SRAM-based
                 solution is highly energy efficient but is unreliable
                 in the face of power loss. This paper advocates an
                 intermediate approach in hybrid FRAM-SRAM
                 microcontrollers that involves judicious memory mapping
                 of program sections to retain the reliability benefits
                 provided by FRAM while performing almost as efficiently
                 as an SRAM-based system. We propose an energy-aware
                 memory mapping technique that maps different program
                 sections to the hybrid FRAM-SRAM microcontroller such
                 that energy consumption is minimized without
                 sacrificing reliability. Our technique consists of
                 eM-map, which performs a one-time characterization to
                 find the optimal memory map for the functions that
                 constitute a program and energy-align, a novel
                 hardware-software technique that aligns the system's
                 powered-on time intervals to function execution
                 boundaries, which results in further improvements in
                 energy efficiency and performance. Experimental results
                 obtained using the MSP430FR5739 microcontroller
                 demonstrate a significant performance improvement of up
                 to 2x and energy reduction of up to 20\% over a
                 state-of-the-art FRAM-based solution. Finally, we
                 present a case study that shows the implementation of
                 our techniques in the context of a real IoT
                 application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tiloca:2017:ADB,
  author =       "Marco Tiloca and Kirill Nikitin and Shahid Raza",
  title =        "{Axiom}: {DTLS}-Based Secure {IoT} Group
                 Communication",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "66:1--66:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047413",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents Axiom, a DTLS-based approach to
                 efficiently secure multicast group communication among
                 IoT-constrained devices. Axiom provides an adaptation
                 of the DTLS record layer, relies on key material
                 commonly shared among the group members, and does not
                 require one to perform any DTLS handshake. We made a
                 proof-of-concept implementation of Axiom based on the
                 tinyDTLS library for the Contiki OS and used it to
                 experimentally evaluate performance of our approach on
                 real IoT hardware. Results show that Axiom is
                 affordable on resource-constrained platforms and
                 performs significantly better than related alternative
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chatterjee:2017:PBS,
  author =       "Urbi Chatterjee and Rajat Subhra Chakraborty and
                 Debdeep Mukhopadhyay",
  title =        "A {PUF}-Based Secure Communication Protocol for
                 {IoT}",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "67:1--67:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3005715",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Security features are of paramount importance for the
                 Internet of Things (IoT), and implementations are
                 challenging given the resource-constrained IoT setup.
                 We have developed a lightweight identity-based
                 cryptosystem suitable for IoT to enable secure
                 authentication and message exchange among the devices.
                 Our scheme employs a Physically Unclonable Function
                 (PUF) to generate the public identity of each device,
                 which is used as the public key for each device for
                 message encryption. We have provided formal proofs of
                 security in the Session Key Security and Universally
                 Composable Framework of the proposed protocol, which
                 demonstrates the resilience of the scheme against
                 passive and active attacks. We have demonstrated the
                 setup required for the protocol implementation and
                 shown that the proposed protocol implementation incurs
                 low hardware and software overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:PSSa,
  author =       "Anfeng Liu and Xiao Liu and Zhipeng Tang and Laurence
                 T. Yang and Zili Shao",
  title =        "Preserving Smart Sink-Location Privacy with Delay
                 Guaranteed Routing Scheme for {WSNs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "68:1--68:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2990500",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A Semi Random Circle routing for mobile Sink joint Ray
                 Routing for data (SRCRR) scheme is proposed for
                 preserving sink-location privacy with a delay
                 guaranteed. In the SRCRR scheme, the data are
                 directionally routed along ray paths and stored at
                 intermediate nodes probabilistically. The Sink moves in
                 a semirandom circular pattern to collect data from the
                 local nodes occasionally, which guarantees that the
                 data will be collected with an acceptable delay and
                 prevents attackers from predicting their locations and
                 movements. The experimental results indicate that the
                 performance of the SRCRR scheme is better than that of
                 the previous schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bennett:2017:DDS,
  author =       "Terrell R. Bennett and Nicholas Gans and Roozbeh
                 Jafari",
  title =        "Data-Driven Synchronization for {Internet-of-Things}
                 Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "69:1--69:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983627",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Internet of Things (IoT) is fueled by the growth
                 of sensors, actuators, and services that collect and
                 process raw sensor data. Wearable and environmental
                 sensors will be a major component of the IoT and
                 provide context about people and activities that are
                 occurring. It is imperative that sensors in the IoT are
                 synchronized, which increases the usefulness and value
                 of the sensor data and allows data from multiple
                 sources to be combined and compared. Due to the
                 heterogeneous nature of sensors (e.g., synchronization
                 protocols, communication channels, etc.),
                 synchronization can be difficult. In this article, we
                 present novel techniques for synchronizing data from
                 multi-sensor environments based on the events and
                 interactions measured by the sensors. We present
                 methods to determine which interactions can likely be
                 used for synchronization and methods to improve
                 synchronization by removing erroneous synchronization
                 points. We validate our technique through experiments
                 with wearable and environmental sensors in a laboratory
                 environment. Experiments resulted in median drift error
                 reduction from 66\% to 98\% for sensors synchronized
                 through physical interactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shen:2017:MQC,
  author =       "Zhaoyan Shen and Zhijian He and Shuai Li and Qixin
                 Wang and Zili Shao",
  title =        "A Multi-Quadcopter Cooperative Cyber-Physical System
                 for Timely Air Pollution Localization",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "70:1--70:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3005716",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a cyber-physical system of unmanned
                 quadcopters to locate air pollution sources in a timely
                 manner. The system consists of a physical part and a
                 cyber part. The physical part includes unmanned
                 quadcopters equipped with multiple sensors. The cyber
                 part carries out control laws. We simplify the control
                 laws by decoupling the quadcopters' horizontal-plane
                 motion control from vertical motion control. To control
                 the quadcopter's horizontal-plane motions, we propose a
                 controller that combines pollutant dynamics with
                 quadcopter physics. To control the quadcopter's
                 vertical motions, we adopt an anti-windup
                 proportional-integral (PI) controller. We further
                 extend the horizontal-plane control laws from a single
                 quadcopter to multiple quadcopters. The
                 multi-quadcopter control laws are distributed and
                 convergent. We implement a prototype quadcopter and
                 carry out experiments to verify the vertical control
                 laws. We also carry out simulations to evaluate the
                 horizontal-plane control laws. With quadcopter
                 parameters set commensurate with our prototype
                 implementation's, our simulations show that the control
                 laws can drive quadcopters to locate pollution
                 source(s) in a timely way.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2017:SVA,
  author =       "Jian Wu and Roozbeh Jafari",
  title =        "Seamless Vision-assisted Placement Calibration for
                 Wearable Inertial Sensors",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "71:1--71:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3023364",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wearable inertial devices are being widely used in the
                 applications of activity tracking, health care, and
                 professional sports, and their usage is on a rapid
                 rise. Signal processing algorithms for these devices
                 are often designed to work with a known location of the
                 wearable sensor on the body. However, in reality, the
                 wearable sensor may be worn at different body locations
                 due to the user's preference or unintentional
                 misplacement. The calibration of the sensor location is
                 important to ensure that the algorithms operate
                 correctly. In this article, we propose an
                 auto-calibration technique for determining the location
                 of wearables on the body by fusing the 3-axis
                 accelerometer data from the devices and
                 three-dimensional camera (i.e., Kinect) information
                 obtained from the environment. The automatic
                 calibration is achieved by a cascade
                 decision-tree-based classifier on top of the minimum
                 least-squares errors obtained by solving Wahba's
                 problem, operating on heterogeneous sensors. The core
                 contribution of our work is that there is no extra
                 burden on the user as a result of this technique. The
                 calibration is done seamlessly, leveraging sensor
                 fusion in an Internet-of-Things setting
                 opportunistically when the user is present in front of
                 an environmental camera performing arbitrary movements.
                 Our approach is evaluated with two different types of
                 movements: simple actions (e.g., sit-to-stand or
                 picking up phone) and complicated tasks (e.g., cooking
                 or playing basketball), yielding 100\% and 82.56\%
                 recall for simple actions and for complicated tasks,
                 respectively, in determining the correct location of
                 sensors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:GEA,
  author =       "Bo-Wei Chen and Wen Ji and Zhu Li",
  title =        "Guest Editorial for {ACM TECS} Special Issue on
                 Effective Divide-and-Conquer, Incremental, or
                 Distributed Mechanisms of Embedded Designs for
                 Extremely Big Data in Large-Scale Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "72:1--72:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3068457",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:DMR,
  author =       "Anfeng Liu and Xiao Liu and Tianyi Wei and Laurence T.
                 Yang and Seungmin (Charlie) Rho and Anand Paul",
  title =        "Distributed Multi-Representative Re-Fusion Approach
                 for Heterogeneous Sensing Data Collection",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "73:1--73:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2974021",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A multi-representative re-fusion (MRRF) approximate
                 data collection approach is proposed in which multiple
                 nodes with similar readings form a data coverage set
                 (DCS). The reading value of the DCS is represented by
                 an R-node. The set near the Sink is smaller, while the
                 set far from the Sink is larger, which can reduce the
                 energy consumption in hotspot areas. Then, a
                 distributed data-aggregation strategy is proposed that
                 can re-fuse the value of R-nodes that are far from each
                 other but have similar readings. Both comprehensive
                 theoretical and experimental results indicate that the
                 MRRF approach increases lifetime and energy
                 efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:LBD,
  author =       "Xiaogang Chen and Z. Jane Wang and Xiangyang Ji",
  title =        "A Load-Balancing Divide-and-Conquer {SVM} Solver",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "74:1--74:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3005347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Scaling up kernel support vector machine (SVM)
                 training has been an important topic in recent years.
                 Despite its theoretical elegance, training kernel SVM
                 is impractical when facing millions of data. The
                 divide-and-conquer (DC) strategy is a natural framework
                 of handling gigantic problems, and the
                 divide-and-conquer solver for kernel SVM (DC-SVM) is
                 able to train kernel SVM with millions of data with
                 limited time cost. However, there are some drawbacks of
                 the DC-SVM approach. First, it used an unsupervised
                 clustering method to partition the whole problem, which
                 is prone to construct singular subsets, and, second, it
                 is hard to balance the computation load between
                 sub-problems. To address these issues, this article
                 proposed a load-balancing partition method for kernel
                 SVM. First, it clusters sample from one class and then
                 assigns data samples to the cluster centers by a
                 distance measure and construct sub-problems; in this
                 way, it is able to control the computation load and
                 avoid singular problems. Experimental results show that
                 the proposed method has better load-balancing
                 performance than DC-SVM, which implies that it is
                 suitable for distributed and embedding systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:PSSb,
  author =       "Bo Liu and Xiao-Tong Yuan and Yang Yu and Qingshan Liu
                 and Dimitris N. Metaxas",
  title =        "Parallel Sparse Subspace Clustering via Joint Sample
                 and Parameter Blockwise Partition",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "75:1--75:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063316",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sparse subspace clustering (SSC) is a classical method
                 to cluster data with specific subspace structure for
                 each group. It has many desirable theoretical
                 properties and has been shown to be effective in
                 various applications. However, under the condition of a
                 large-scale dataset, learning the sparse sample
                 affinity graph is computationally expensive. To tackle
                 the computation time cost challenge, we develop a
                 memory-efficient parallel framework for computing SSC
                 via an alternating direction method of multiplier
                 (ADMM) algorithm. The proposed framework partitions the
                 data matrix into column blocks and then decomposes the
                 original problem into parallel multivariate Lasso
                 regression subproblems and samplewise operations. The
                 proposed method allows us to allocate multiple
                 cores/machines for the processing of individual column
                 blocks. We propose a stochastic optimization algorithm
                 to minimize the objective function. Experimental
                 results on real-world datasets demonstrate that the
                 proposed blockwise ADMM framework is substantially more
                 efficient than its matrix counterpart used by SSC,
                 without sacrificing performance in applications.
                 Moreover, our approach is directly applicable to
                 parallel neighborhood selection for Gaussian graphical
                 models structure estimation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kung:2017:CPD,
  author =       "Sun-Yuan Kung and Thee Chanyaswad and J. Morris Chang
                 and Peiyuan Wu",
  title =        "Collaborative {PCA\slash DCA} Learning Methods for
                 Compressive Privacy",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "76:1--76:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996460",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the Internet era, the data being collected on
                 consumers like us are growing exponentially, and
                 attacks on our privacy are becoming a real threat. To
                 better ensure our privacy, it is safer to let the data
                 owner control the data to be uploaded to the network as
                 opposed to taking chance with data servers or third
                 parties. To this end, we propose compressive privacy, a
                 privacy-preserving technique to enable the data creator
                 to compress data via collaborative learning so that the
                 compressed data uploaded onto the Internet will be
                 useful only for the intended utility and not be easily
                 diverted to malicious applications. For data in a
                 high-dimensional feature vector space, a common
                 approach to data compression is dimension reduction or,
                 equivalently, subspace projection. The most prominent
                 tool is principal component analysis (PCA). For
                 unsupervised learning, PCA can best recover the
                 original data given a specific reduced dimensionality.
                 However, for the supervised learning environment, it is
                 more effective to adopt a supervised PCA, known as
                 discriminant component analysis (DCA), to maximize the
                 discriminant capability. The DCA subspace analysis
                 embraces two different subspaces. The signal-subspace
                 components of DCA are associated with the discriminant
                 distance/power (related to the classification
                 effectiveness), whereas the noise subspace components
                 of DCA are tightly coupled with recoverability and/or
                 privacy protection. This article presents three
                 DCA-related data compression methods useful for
                 privacy-preserving applications: --- Utility-driven
                 DCA: Because the rank of the signal subspace is limited
                 by the number of classes, DCA can effectively support
                 classification using a relatively small dimensionality
                 (i.e., high compression). --- Desensitized PCA: By
                 incorporating a signal-subspace ridge into DCA, it
                 leads to a variant especially effective for extracting
                 privacy-preserving components. In this case, the
                 eigenvalues of the noise-space are made to become
                 insensitive to the privacy labels and are ordered
                 according to their corresponding component powers. ---
                 Desensitized K-means/SOM: Since the revelation of the
                 K-means or SOM cluster structure could leak sensitive
                 information, it is safer to perform K-means or SOM
                 clustering on a desensitized PCA subspace.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fleming:2017:CDI,
  author =       "Tom Fleming and Huang-Ming Huang and Alan Burns and
                 Chris Gill and Sanjoy Baruah and Chenyang Lu",
  title =        "Corrections to and Discussion of {``Implementation and
                 Evaluation of Mixed-criticality Scheduling Approaches
                 for Sporadic Tasks''}",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "77:1--77:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2974020",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  note =         "See \cite{Huang:2014:IEM}.",
  abstract =     "The AMC-IA mixed-criticality scheduling analysis was
                 proposed as an improvement to the AMC-MAX adaptive
                 mixed-criticality scheduling analysis. However, we have
                 identified several necessary corrections to the AMC-IA
                 analysis. In this article, we motivate and describe
                 those corrections, and discuss and illustrate why the
                 corrected AMC-IA analysis cannot be shown to outperform
                 AMC-MAX.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bouraoui:2017:HAE,
  author =       "Hasna Bouraoui and Chadlia Jerad and Anupam
                 Chattopadhyay and Nejib Ben Hadj-Alouane",
  title =        "Hardware Architectures for Embedded Speaker
                 Recognition Applications: a Survey",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "78:1--78:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2975161",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Authentication technologies based on biometrics, such
                 as speaker recognition, are attracting more and more
                 interest thanks to the elevated level of security
                 offered by these technologies. Despite offering many
                 advantages, such as remote use and low vulnerability,
                 speaker recognition applications are constrained by the
                 heavy computational effort and the hard real-time
                 constraints. When such applications are run on an
                 embedded platform, the problem becomes more
                 challenging, as additional constraints inherent to this
                 specific domain are added. In the literature, different
                 hardware architectures were used/designed for
                 implementing a process with a focus on a given
                 particular metric. In this article, we give a survey of
                 the state-of-the-art works on implementations of
                 embedded speaker recognition applications. Our aim is
                 to provide an overview of the different approaches
                 dealing with acceleration techniques oriented towards
                 speaker and speech recognition applications and attempt
                 to identify the past, current, and future research
                 trends in the area. Indeed, on the one hand, many
                 flexible solutions were implemented, using either
                 General Purpose Processors or Digital Signal
                 Processors. In general, these types of solutions suffer
                 from low area and energy efficiency. On the other hand,
                 high-performance solutions were implemented on
                 Application Specific Integrated Circuits or Field
                 Programmable Gate Arrays but at the expense of
                 flexibility. Based on the available results, we compare
                 the application requirements vis-{\`a}-vis the
                 performance achieved by the systems. This leads to the
                 projection of new research trends that can be
                 undertaken in the future.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xu:2017:AFA,
  author =       "Ye Xu and Israel Koren and C. Mani Krishna",
  title =        "{AdaFT}: a Framework for Adaptive Fault Tolerance for
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "79:1--79:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2980763",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPS) frequently have to use
                 massive redundancy to meet application requirements for
                 high reliability. While such redundancy is required, it
                 can be activated adaptively, based on the current state
                 of the controlled plant. Most of the time, the plant is
                 in a state that allows for a lower level of fault
                 tolerance. Avoiding the continuous deployment of
                 massive fault tolerance will greatly reduce the
                 workload of the CPS, and lower the operating
                 temperature of the cyber sub-system, thus increasing
                 its reliability. In this article, we extend our prior
                 research by demonstrating a software simulation
                 framework Adaptive Fault Tolerance (AdaFT) that can
                 automatically generate the sub-spaces within which our
                 adaptive fault tolerance can be applied. We also show
                 the theoretical benefits of AdaFT and its actual
                 implementation in several real-world CPSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pagliari:2017:ABC,
  author =       "Daniele Jahier Pagliari and Mario R. Casu and Luca P.
                 Carloni",
  title =        "Accelerators for Breast Cancer Detection",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "80:1--80:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983630",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Algorithms used in microwave imaging for breast cancer
                 detection require hardware acceleration to speed up
                 execution time and reduce power consumption. In this
                 article, we present the hardware implementation of two
                 accelerators for two alternative imaging algorithms
                 that we obtain entirely from SystemC specifications via
                 high-level synthesis. The two algorithms present
                 opposite characteristics that stress the design process
                 and the capabilities of commercial HLS tools in
                 different ways: the first is communication bound and
                 requires overlapping and pipelining of communication
                 and computation in order to maximize the application
                 throughput; the second is computation bound and uses
                 complex mathematical functions that HLS tools do not
                 directly support. Despite these difficulties, thanks to
                 HLS, in the span of only 4 months we were able to
                 explore a large design space and derive about 100
                 implementations with different cost-performance
                 profiles, targeting both a Field-Programmable Gate
                 Array (FPGA) platform and a 32-nm standard-cell
                 Application Specific Integrated Circuit (ASIC) library.
                 In addition, we could obtain results that outperform a
                 previous Register-Transfer Level (RTL) implementation,
                 which confirms the remarkable progress of HLS tools.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:SBT,
  author =       "Jiunn-Yeu Chen and Wuu Yang and Wei-Chung Hsu and
                 Bor-Yeh Shen and Quan-Huei Ou",
  title =        "On Static Binary Translation of {ARM\slash Thumb}
                 Mixed {ISA} Binaries",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "81:1--81:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996458",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Code discovery has been a main challenge for static
                 binary translation, especially when the source
                 instruction set architecture has variable-length
                 instructions, such as the x86 architectures. Due to
                 embedded data such as PC (program counter)-relative
                 data, jump tables, or paddings in the code section, a
                 binary translator may be misled to translate data as
                 instructions. For variable-length instructions, once a
                 piece of data is mis-translated as instructions,
                 decoding subsequent bytes could also go wrong. We are
                 concerned with static binary translation for the very
                 popular Advanced RISC Machine (ARM) architectures.
                 Although ARM is considered a reduced instruction set
                 computer architecture, it does allow the mix of 32-bit
                 (ARM) instructions and 16-bit (Thumb) instructions in
                 the same executables. In addition to different
                 instruction lengths, the ARM and Thumb instructions are
                 located at 4-byte or 2-byte aligned addresses,
                 respectively. Furthermore, because ARM and Thumb
                 instructions share the same encoding space, a 4-byte
                 word could sometimes be decoded as one ARM instruction
                 or two Thumb instructions. The correct decoding of this
                 4-byte word is actually determined at runtime by the
                 least-significant bit of the program counter. For
                 unstripped binaries, the mapping symbols can be used to
                 identify ARM code regions and Thumb code regions.
                 However, for stripped binaries, such mapping symbols
                 are unavailable. We propose a novel solution to
                 statically translate stripped ARM/Thumb mixed
                 executables. Our solution is implemented in a static
                 binary translator. The binary translator further
                 generates multiple versions of translated code for the
                 code regions whose types cannot be determined with our
                 solution. One of the code versions is selected during
                 runtime. The binary translator also includes a series
                 of analyses that enable the removal of most useless
                 code versions. Based on the experimental results on
                 stripped ARM/Thumb mixed binaries in the SPEC2006 and
                 Embedded Microprocessor Benchmark Consortium (EEMBC)
                 benchmark suites, our static binary translator achieves
                 impressive performance when migrating them to run on
                 x86 machines and the space overhead is no more than
                 10\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tan:2017:ITM,
  author =       "Wilson M. Tan and Paul Sullivan and Hamish Watson and
                 Joanna Slota-Newson and Stephen A. Jarvis",
  title =        "An Indoor Test Methodology for Solar-Powered Wireless
                 Sensor Networks",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "82:1--82:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994604",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Repeatable and accurate tests are important when
                 designing hardware and algorithms for solar-powered
                 wireless sensor networks (WSNs). Since no two days are
                 exactly alike with regard to energy harvesting, tests
                 must be carried out indoors. Solar simulators are
                 traditionally used in replicating the effects of
                 sunlight indoors; however, solar simulators are
                 expensive, have lighting elements that have short
                 lifetimes, and are usually not designed to carry out
                 the types of tests that hardware and algorithm
                 designers require. As a result, hardware and algorithm
                 designers use tests that are inaccurate and not
                 repeatable (both for others and also for the designers
                 themselves). In this article, we propose an indoor test
                 methodology that does not rely on solar simulators. The
                 test methodology has its basis in astronomy and
                 photovoltaic cell design. We present a generic design
                 for a test apparatus that can be used in carrying out
                 the test methodology. We also present a specific design
                 that we use in implementing an actual test apparatus.
                 We test the efficacy of our test apparatus and, to
                 demonstrate the usefulness of the test methodology,
                 perform experiments akin to those required in projects
                 involving solar-powered WSNs. Results of the said tests
                 and experiments demonstrate that the test methodology
                 is an invaluable tool for hardware and algorithm
                 designers working with solar-powered WSNs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:SUE,
  author =       "Tseng-Yi Chen and Yuan-Hao Chang and Shuo-Han Chen and
                 Nien-I Hsu and Hsin-Wen Wei and Wei-Kuan Shih",
  title =        "On Space Utilization Enhancement of File Systems for
                 Embedded Storage Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "83:1--83:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2820488",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Since the mid-2000s, mobile/embedded computing systems
                 conventionally have limited computing power, Random
                 Access Memory (RAM) space, and storage capacity due to
                 the consideration of their cost, energy consumption,
                 and physical size. Recently, some of these systems,
                 such as mobile phone and embedded consumer electronics,
                 have more powerful computing capability, so they manage
                 their data in small flash storage devices (e.g.,
                 Embedded Multi Media Card (eMMC) and Secure Digital
                 (SD) cards) with a simple file system. However, the
                 existing file systems usually have low space
                 utilization for managing small files and the tail data
                 of large files. In this work, we thus propose a dynamic
                 tail packing scheme to enhance the space utilization of
                 file systems over flash storage devices in embedded
                 computing systems by dynamically aggregating/packing
                 the tail data of (small) files together. To evaluate
                 the benefits and overheads of the proposed scheme, we
                 theoretically formulate analysis equations for
                 obtaining the best settings in the dynamic tail packing
                 scheme. Additionally, the proposed scheme was
                 implemented in the file system of Linux operating
                 systems to evaluate its capability. The results
                 demonstrate that the proposed scheme could
                 significantly improve the space utilization of existing
                 file systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Landy:2017:SAS,
  author =       "Aaron Landy and Greg Stitt",
  title =        "Serial Arithmetic Strategies for Improving {FPGA}
                 Throughput",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "84:1--84:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996459",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Serial arithmetic has been shown to offer attractive
                 advantages in area for field-programmable gate array
                 (FPGA) datapaths but suffers from a significant
                 reduction in throughput compared to traditional
                 bit-parallel designs. In this work, we perform a
                 performance and trade-off analysis that
                 counterintuitively shows that, despite the decreased
                 throughput of individual serial operators, replication
                 of serial arithmetic can provide a 2.1 $ \times $
                 average increase in throughput compared to bit-parallel
                 pipelines for common FPGA applications. We complement
                 this analysis with a novel SerDes architecture that
                 enables existing FPGA pipelines to be replaced with
                 serial logic with potentially higher throughput. We
                 also present a serialized sliding-window architecture
                 that improves average throughput 2.4 $ \times $
                 compared to existing bit-parallel work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alur:2017:SBR,
  author =       "Rajeev Alur and Vojtech Forejt and Salar Moarref and
                 Ashutosh Trivedi",
  title =        "Schedulability of Bounded-Rate Multimode Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "85:1--85:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996797",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Bounded-rate multimode systems are hybrid systems that
                 switch freely among a finite set of modes, and whose
                 dynamics are specified by a finite number of
                 real-valued variables with mode-dependent rates that
                 vary within given bounded sets. The scheduler
                 repeatedly proposes a time and a mode, while the
                 environment chooses an allowable rate for that mode;
                 the state of the system changes linearly in the
                 direction of the rate. The scheduler aims to keep the
                 state within a safe set, while the environment aims to
                 leave it. We study the problem of existence of a
                 winning scheduler strategy and associated complexity
                 questions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bandari:2017:DBE,
  author =       "Maryam Bandari and Robert Simon and Hakan Aydin",
  title =        "{DMS}-Based Energy Optimizations for Clustered
                 {WSNs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "86:1--86:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2998179",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we consider clustered wireless sensor
                 networks where the nodes harvest energy from the
                 environment. We target performance-sensitive
                 applications that have to collectively send their
                 information to a cluster head by a predefined deadline.
                 The nodes are equipped with Dynamic Modulation Scaling
                 (DMS)-capable wireless radios. DMS provides a tuning
                 knob, allowing us to trade off communication latency
                 with energy consumption. We consider two optimization
                 objectives, maximizing total energy reserves and
                 maximizing the minimum energy level across all nodes.
                 For both objectives, we show that optimal solutions can
                 be obtained by solving Mixed Integer Linear Programming
                 problems. We also develop several fast heuristics that
                 are shown to provide approximate solutions
                 experimentally.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Devaraj:2017:FTP,
  author =       "Rajesh Devaraj and Arnab Sarkar and Santosh Biswas",
  title =        "Fault-Tolerant Preemptive Aperiodic {RT} Scheduling by
                 Supervisory Control of {TDES} on Multiprocessors",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "87:1--87:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3012278",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Safety-critical real-time systems must meet stringent
                 timing and fault-tolerance requirements. This article
                 proposes a methodology for synthesizing an optimal
                 preemptive multiprocessor aperiodic task scheduler
                 using a formal supervisory control framework. The
                 scheduler can tolerate single/multiple permanent
                 processor faults. Further, the synthesis framework has
                 been empowered with a novel BDD-based symbolic
                 computation mechanism to control the exponential
                 state-space complexity of the optimal exhaustive
                 enumeration-oriented synthesis methodology.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lu:2017:CDS,
  author =       "Qining Lu and Guanpeng Li and Karthik Pattabiraman and
                 Meeta S. Gupta and Jude A. Rivers",
  title =        "Configurable Detection of {SDC}-causing Errors in
                 Programs",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "88:1--88:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3014586",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Silent Data Corruption (SDC) is a serious reliability
                 issue in many domains, including embedded systems.
                 However, current protection techniques are brittle and
                 do not allow programmers to trade off performance for
                 SDC coverage. Further, many require tens of thousands
                 of fault-injection experiments, which are highly time-
                 and resource-intensive. In this article, we propose two
                 empirical models, SDCTune and SDCAuto, to predict the
                 SDC proneness of a program's data. Both models are
                 based on static and dynamic features of the program
                 alone and do not require fault injections to be
                 performed. The main difference between them is that
                 SDCTune requires manual tuning while SDCAuto is
                 completely automated, using machine-learning
                 algorithms. We then develop an algorithm using both
                 models to selectively protect the most SDC-prone data
                 in the program subject to a given performance overhead
                 bound. Our results show that both models are accurate
                 at predicting the relative SDC rate of an application
                 compared to fault injection, for a fraction of the time
                 taken. Further, in terms of efficiency of detection
                 (i.e., ratio of SDC coverage provided to performance
                 overhead), our technique outperforms full duplication
                 by a factor of 0.78x to 1.65x with the SDCTune model
                 and 0.62x to 0.96x with SDCAuto model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2017:FBA,
  author =       "Guoxian Huang and Lei Wang",
  title =        "An {FPGA}-Based Architecture for High-Speed Compressed
                 Signal Reconstruction",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "89:1--89:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3056481",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Compressive Sensing (CS) is an emerging research area
                 that allows efficient signal acquisition under the
                 sub-Nyquist rate while still promising reliable data
                 recovery. However, practical applications of CS in
                 hardware platforms are limited as signal reconstruction
                 is still challenging due to its high computational
                 complexity, especially for autonomous real-time signal
                 recovery. In this article, we propose an algorithmic
                 transformation technique referred to as Matrix
                 Inversion Bypass (MIB) to improve the signal recovery
                 efficiency of the Orthogonal Matching Pursuit
                 (OMP)-based CS reconstruction. The basic idea of MIB is
                 to decouple the computations of intermediate signal
                 estimates and matrix inversions, thereby enabling
                 parallel processing of these two time-consuming
                 operations in the OMP algorithm. The proposed MIB
                 naturally leads to a parallel architecture for
                 high-speed dedicated hardware implementations. An
                 FPGA-based implementation is developed with the
                 optimized structure aimed at the efficient utilization
                 of hardware resources while realizing high-speed signal
                 recovery. The proposed architecture can perform the
                 signal recovery at up to 1.4 $ \times $ faster than the
                 OMP-based implementation using almost the same hardware
                 resources.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Groza:2017:LCL,
  author =       "Bogdan Groza and Stefan Murvay and Anthony {Van
                 Herrewege} and Ingrid Verbauwhede",
  title =        "{LiBrA--CAN}: Lightweight Broadcast Authentication for
                 Controller Area Networks",
  journal =      j-TECS,
  volume =       "16",
  number =       "3",
  pages =        "90:1--90:??",
  month =        jul,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3056506",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 24 09:51:12 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite realistic concerns, security is still absent
                 from vehicular buses such as the widely used Controller
                 Area Network (CAN). We design an efficient protocol
                 based on efficient symmetric primitives, taking
                 advantage of two innovative procedures: splitting keys
                 between nodes and mixing authentication tags. This
                 results in a higher security level when compromised
                 nodes are in the minority, a realistic assumption for
                 automotive networks. Experiments are performed on
                 state-of-the-art Infineon TriCore controllers,
                 contrasted with low-end Freescale S12X cores, while
                 simulations are provided for the recently released
                 CAN-FD standard. To gain compatibility with existent
                 networks, we also discuss a solution based on CAN+.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2017:ESM,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Security of Mobile Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "91:1--91:??",
  month =        sep,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3129534",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Dec 9 08:24:13 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mathew:2017:GES,
  author =       "Jimson Mathew and Rajat Subhra Chakraborty and Dhiraj
                 K. Pradhan",
  title =        "Guest Editorial: Special Issue on {``Secure and
                 Fault-Tolerant Embedded Computing''}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "92:1--92:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3075563",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ko:2017:PCS,
  author =       "Yohan Ko and Reiley Jeyapaul and Youngbin Kim and
                 Kyoungwoo Lee and Aviral Shrivastava",
  title =        "Protecting Caches from Soft Errors: a Microarchitect's
                 Perspective",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "93:1--93:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063180",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Soft error is one of the most important design
                 concerns in modern embedded systems with aggressive
                 technology scaling. Among various microarchitectural
                 components in a processor, cache is the most
                 susceptible component to soft errors. Error detection
                 and correction codes are common protection techniques
                 for cache memory due to their design simplicity. In
                 order to design effective protection techniques for
                 caches, it is important to quantitatively estimate the
                 susceptibility of caches without and even with
                 protections. At the architectural level, vulnerability
                 is the metric to quantify the susceptibility of data in
                 caches. However, existing tools and techniques
                 calculate the vulnerability of data in caches through
                 coarse-grained block-level estimation. Further, they
                 ignore common cache protection techniques such as error
                 detection and correction codes. In this article, we
                 demonstrate that our word-level vulnerability
                 estimation is accurate through intensive fault
                 injection campaigns as compared to block-level one.
                 Further, our extensive experiments over benchmark
                 suites reveal several counter-intuitive and interesting
                 results. Parity checking when performed over just reads
                 provides reliable and power-efficient protection than
                 that when performed over both reads and writes. On the
                 other hand, checking error correcting codes only at
                 reads alone can be vulnerable even for single-bit soft
                 errors, while that at both reads and writes provides
                 the perfect reliability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Esposito:2017:NMO,
  author =       "Stefano Esposito and Massimo Violante and Marco Sozzi
                 and Marco Terrone and Massimo Traversone",
  title =        "A Novel Method for Online Detection of Faults
                 Affecting Execution-Time in Multicore-Based Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "94:1--94:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063313",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a bounded interference method,
                 based on statistical evaluations, for online detection
                 and tolerance of any fault capable of causing a
                 deadline miss. The proposed method requires data that
                 can be gathered during the profiling and worst-case
                 execution time (WCET) analysis phase. This article
                 describes the method, its application, and then it
                 presents an avionic mixed-criticality use case for
                 experimental evaluation, considering both dual-core and
                 quad-core platforms. Results show that faults that can
                 cause a timing violation are correctly identified while
                 other faults that do not introduce a significant
                 temporal interference can be tolerated to avoid high
                 recovery overheads.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yuce:2017:AFI,
  author =       "Bilgiday Yuce and Nahid Farhady Ghalaty and Chinmay
                 Deshpande and Harika Santapuri and Conor Patrick and
                 Leyla Nazhandali and Patrick Schaumont",
  title =        "Analyzing the Fault Injection Sensitivity of Secure
                 Embedded Software",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "95:1--95:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063311",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Fault attacks on cryptographic software use faulty
                 ciphertext to reverse engineer the secret encryption
                 key. Although modern fault analysis algorithms are
                 quite efficient, their practical implementation is
                 complicated because of the uncertainty that comes with
                 the fault injection process. First, the intended fault
                 effect may not match the actual fault obtained after
                 fault injection. Second, the logic target of the fault
                 attack, the cryptographic software, is above the
                 abstraction level of physical faults. The resulting
                 uncertainty with respect to the fault effects in the
                 software may degrade the efficiency of the fault
                 attack, resulting in many more trial fault injections
                 than the amount predicted by the theoretical fault
                 attack. In this contribution, we highlight the
                 important role played by the processor
                 microarchitecture in the development of a fault attack.
                 We introduce the microprocessor fault sensitivity model
                 to systematically capture the fault response of a
                 microprocessor pipeline. We also propose
                 Microarchitecture-Aware Fault Injection Attack (MAFIA).
                 MAFIA uses the fault sensitivity model to guide the
                 fault injection and to predict the fault response. We
                 describe two applications for MAFIA. First, we
                 demonstrate a biased fault attack on an unprotected
                 Advanced Encryption Standard (AES) software program
                 executing on a seven-stage pipelined Reduced
                 Instruction Set Computer (RISC) processor. The use of
                 the microprocessor fault sensitivity model to guide the
                 attack leads to an order of magnitude fewer fault
                 injections compared to a traditional, blind fault
                 injection method. Second, MAFIA can be used to break
                 known software countermeasures against fault injection.
                 We demonstrate this by systematically breaking a
                 collection of state-of-the-art software fault
                 countermeasures. These two examples lead to the key
                 conclusion of this work, namely that software fault
                 attacks become much more harmful and effective when an
                 appropriate microprocessor fault sensitivity model is
                 used. This, in turn, highlights the need for better
                 fault countermeasures for software.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mera:2017:ATP,
  author =       "Maria Isabel Mera and Jonah Caplan and Seyyed Hasan
                 Mozafari and Brett H. Meyer and Peter Milder",
  title =        "Area, Throughput, and Power Trade-Offs for {FPGA}- and
                 {ASIC}-Based Execution Stream Compression",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "96:1--96:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063312",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An emerging trend in safety-critical computer system
                 design is the use of compression --- for example, using
                 cyclic redundancy check (CRC) or Fletcher checksum (FC)
                 --- to reduce the state that must be compared to verify
                 correct redundant execution. We examine the costs and
                 performance of CRC and FC as compression algorithms
                 when implemented in hardware for embedded
                 safety-critical systems. To do so, we have developed
                 parameterizable hardware-generation tools targeting CRC
                 and two novel FC implementations. We evaluate the
                 resulting designs implemented for FPGA and ASIC and
                 analyze their efficiency. While CRC is often best, FC
                 dominates when high throughput is needed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tigori:2017:FMB,
  author =       "Kabland Toussaint Gautier Tigori and Jean-Luc
                 B{\'e}chennec and S{\'e}bastien Faucou and Olivier
                 Henri Roux",
  title =        "Formal Model-Based Synthesis of Application-Specific
                 Static {RTOS}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "97:1--97:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3015777",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In an embedded system, the specialization of the code
                 of the real-time operating system (RTOS) according to
                 the requirements of the application allows one to
                 remove unused services and other sources of dead code
                 from the binary program. The typical specialization
                 process is based on a mix of precompiler macros and
                 build scripts, both of which are known for being
                 sources of errors. In this article, we present a new
                 model-based approach to the design of
                 application-specific RTOS. Starting with finite state
                 models describing the RTOS and the application
                 requirements, the set of blocks in the RTOS code
                 actually used by the application is automatically
                 computed. This set is used to build an
                 application-specific RTOS model. This model is fed into
                 a code generator to produce the source code of an
                 application-specific RTOS. It is also used to carry on
                 model-based validations and verifications, including
                 the formal verification that the specialization process
                 did not introduce unwanted behaviors or suppress
                 expected ones. To demonstrate the feasibility of this
                 approach, it is applied to specialize Trampoline, an
                 open-source implementation of the AUTOSAR OS standard,
                 to an industrial case study from the automotive
                 domain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Santanna:2017:DIS,
  author =       "Francisco Sant'anna and Roberto Ierusalimschy and
                 Noemi Rodriguez and Silvana Rossetto and Adriano
                 Branco",
  title =        "The Design and Implementation of the Synchronous
                 Language {C{\'e}U}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "98:1--98:26",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3035544",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "C {\'e}U is a synchronous language targeting soft
                 real-time systems. It is inspired by Esterel and has a
                 simple semantics with fine-grain control over program
                 execution. C{\'e}U uses an event-triggered notion of
                 time that enables compile-time checks to detect
                 conflicting concurrent statements, resulting in
                 deterministic and concurrency-safe programs. We present
                 the particularities of our design in comparison to
                 Esterel, such as stack-based internal events,
                 concurrency checks, safe integration with C, and
                 first-class timers. We also present two implementation
                 back ends: one aiming for resource efficiency and
                 interoperability with C, and another as a virtual
                 machine that allows remote reprogramming.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stilkerich:2017:PGU,
  author =       "Isabella Stilkerich and Clemens Lang and Christoph
                 Erhardt and Christian Bay and Michael Stilkerich",
  title =        "The Perfect Getaway: Using Escape Analysis in Embedded
                 Real-Time Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "99:1--99:30",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3035542",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The use of a managed, type-safe language such as Java
                 in real-time and embedded systems offers productivity
                 and, in particular, safety and dependability benefits
                 at a reasonable cost. It has been shown for commodity
                 systems that Escape Analysis (EA) enables a set of
                 useful optimizations, and benefits from the properties
                 of a type-safe language. In this article, we explore
                 the application of escape analysis in KESO [Stilkerich
                 et al. 2012], a Java ahead-of-time compiler targeting
                 embedded real-time systems. We present specific
                 applications of EA for embedded programs that go beyond
                 the widely known stack-allocation and synchronization
                 optimizations such as extended remote-procedure-call
                 (RPC) support for software-isolated applications,
                 automated inference of immutable data, or improved
                 upper space and time bounds for worst-case
                 estimations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hassan:2017:PRA,
  author =       "Mohamed Hassan and Hiren Patel and Rodolfo
                 Pellizzoni",
  title =        "{PMC}: a Requirement-Aware {DRAM} Controller for
                 Multicore Mixed Criticality Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "100:1--100:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3019611",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a novel approach to schedule memory
                 requests in Mixed Criticality Systems (MCS). This
                 approach supports an arbitrary number of criticality
                 levels by enabling the MCS designer to specify memory
                 requirements per task. It retains locality within
                 large-size requests to satisfy memory requirements of
                 all tasks. To achieve this target, we introduce a
                 compact time-division-multiplexing scheduler, and a
                 framework that constructs optimal schedules to manage
                 requests to off-chip memory. We also present a static
                 analysis that guarantees meeting requirements of all
                 tasks. We compare the proposed controller against
                 state-of-the-art memory controllers using both a case
                 study and synthetic experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:HAT,
  author =       "Tianyi Wang and Soamar Homsi and Linwei Niu and
                 Shaolei Ren and Ou Bai and Gang Quan and Meikang Qiu",
  title =        "Harmonicity-Aware Task Partitioning for Fixed Priority
                 Scheduling of Probabilistic Real-Time Tasks on
                 Multi-Core Platforms",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "101:1--101:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3064813",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The uncertainty due to performance variations of IC
                 chips and resource sharing on multi-core platforms have
                 significantly degraded the predictability of real-time
                 systems. Traditional deterministic approaches based on
                 the worst-case assumptions become extremely pessimistic
                 and thus unpractical. In this article, we address the
                 problem of scheduling a set of fixed-priority periodic
                 real-time tasks on multi-core platforms in a
                 probabilistic manner. Specifically, we consider task
                 execution time as a probabilistic distribution and
                 study how to schedule these tasks on multi-core
                 platforms with guaranteed Quality of Service (QoS)
                 requirements in terms of deadline-missing
                 probabilities. Moreover, it is a well-known fact that
                 the relationship among task periods, if exploited
                 appropriately, can significantly improve the processor
                 utilization. To this end, we present a novel approach
                 to partition real-time tasks that can take both task
                 execution time distributions and their period
                 relationships into consideration. From our extensive
                 experiment results, our proposed methods can greatly
                 improve the schedulability of real-time tasks when
                 compared with existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:DRM,
  author =       "Yi Wang and Yajun Ha",
  title =        "A {DFA}-Resistant and Masked {PRESENT} with Area
                 Optimization for {RFID} Applications",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "102:1--102:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3035543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Radio-Frequency Identification (RFID) tag-based
                 applications are usually resource constrained and
                 security sensitive. However, only about 2,000 gate
                 equivalents in a tag can be budgeted for implementing
                 security components [27]. This requires not only
                 lightweight cryptographic algorithms such as PRESENT
                 (around 1,000 gate equivalents) but also lightweight
                 protections against modern Side Channel Attacks (SCAs).
                 With this budget, the first-order masking and fault
                 detection are two suitable countermeasures to be
                 developed for PRESENT. However, if both countermeasures
                 are applied without any optimization, it will
                 significantly exceed the given area budget. In this
                 work, we optimize area to include both countermeasures
                 to maximize the security for PRESENT within this RFID
                 area budget. The most area-consuming parts of the
                 proposed design are the masked S-boxes and the inverse
                 masked S-boxes. To optimize the area, we have deduced a
                 computational relationship between these two parts,
                 which enables us to reuse the hardware resource of the
                 masked S-boxes to implement the inverse masked S-boxes.
                 The proposed design takes up only 2,376 gates with UMC
                 65nm CMOS technology. Compared with the unoptimized
                 design, our implementation reduces the overall area by
                 28.45\%. We have tested the effectiveness of the
                 first-order Differential Power Analysis (DPA) and
                 Differential Fault Analysis (DFA) -resistant
                 countermeasures. Experimental results show that we have
                 enhanced the SCA resistance of our PRESENT
                 implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nagar:2017:RCB,
  author =       "Kartik Nagar and Y. N. Srikant",
  title =        "Refining Cache Behavior Prediction Using Cache Miss
                 Paths",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "103:1--103:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3035541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Worst-Case Execution Time (WCET) is an important
                 metric for programs running on real-time systems, and
                 finding precise estimates of a program's WCET is
                 crucial to avoid wastage of hardware resources and to
                 improve the schedulability of task sets. Caches have a
                 major impact on a program's execution time, and
                 accurate estimation of a program's cache behavior can
                 lead to significant reduction in its estimated WCET.
                 The traditional approach to cache analysis generally
                 targets the worst-case cache behavior of individual
                 cache accesses and provides a safe hit-miss
                 classification for every individual access. In this
                 work, we show that these classifications are not
                 sufficient to precisely capture cache behavior, since
                 they apply to individual accesses, and often, more
                 precise predictions can be made about groups of
                 accesses. Further, memory accesses inside loops may
                 show the worst-case behavior only for a subset of the
                 iteration space. In order to predict such behavior in a
                 scalable fashion, we use the fact that the cache
                 behavior of an access mostly depends only on the memory
                 accesses made in the immediate vicinity, and hence we
                 analyze a small, fixed-size neighborhood of every
                 access with complete precision and summarize the
                 resulting information in the form of cache miss paths.
                 A variety of analyses are then performed on the cache
                 miss paths to make precise predictions about cache
                 behavior. We also demonstrate precision issues in
                 Abstract Interpretation-based Must and Persistence
                 cache analysis that can be easily solved using cache
                 miss paths. Experimental results over a wide range of
                 benchmarks demonstrate precision improvement in WCET of
                 multipath programs over previous approaches, and we
                 also show how to integrate our approach with other
                 microarchitectural analysis such as pipeline
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Benerecetti:2017:ASS,
  author =       "Massimo Benerecetti and Marco Faella",
  title =        "Automatic Synthesis of Switching Controllers for
                 Linear Hybrid Systems: Reachability Control",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "104:1--104:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047500",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider the problem of computing the controllable
                 region of a Linear Hybrid Automaton with controllable
                 and uncontrollable transitions, w.r.t. a reachability
                 objective. We provide an algorithm for the
                 finite-horizon version of the problem, based on
                 computing the set of states that must reach a given
                 non-convex polyhedron while avoiding another one,
                 subject to a polyhedral constraint on the slope of the
                 trajectory. Experimental results are presented, based
                 on an implementation of the proposed algorithm on top
                 of the tool SpaceEx.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sandoval:2017:TTS,
  author =       "Nathan Sandoval and Casey Mackin and Sean Whitsitt and
                 Vijay Shankar Gopinath and Sachidanand Mahadevan and
                 Andrew Milakovich and Kyle Merry and Jonathan Sprinkle
                 and Roman Lysecky",
  title =        "Task Transition Scheduling for Data-Adaptable
                 Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "105:1--105:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047498",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Data-adaptable embedded systems operate on a variety
                 of data streams, which requires a large degree of
                 configurability and adaptability to support runtime
                 changes in data stream inputs. Data-adaptable
                 reconfigurable embedded systems, when decomposed into a
                 series of tasks, enable a flexible runtime
                 implementation in which a system can transition the
                 execution of certain tasks between hardware and
                 software while simultaneously continuing to process
                 data during the transition. Efficient runtime
                 scheduling of task transitions is needed to optimize
                 system throughput and latency of the reconfiguration
                 and transition periods. In this article, we provide an
                 overview of a runtime framework enabling the efficient
                 transition of tasks between software and hardware in
                 response to changes in system inputs. We further
                 present and analyze several runtime transition
                 scheduling algorithms and highlight the latency and
                 throughput tradeoffs for two data-adaptable systems. To
                 evaluate the task transition selection algorithms, a
                 case study was performed on an adaptable JPEG2000
                 implementation as well as three other synchronous
                 dataflow systems characterized by transition latency
                 and communication load.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zheng:2017:RTS,
  author =       "Xi Zheng and Christine Julien and Hongxu Chen and
                 Rodion Podorozhny and Franck Cassez",
  title =        "Real-Time Simulation Support for Runtime Verification
                 of Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "106:1--106:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063382",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In Cyber-Physical Systems (CPS), cyber and physical
                 components must work seamlessly in tandem. Runtime
                 verification of CPS is essential yet very difficult,
                 due to deployment environments that are expensive,
                 dangerous, or simply impossible to use for verification
                 tasks. A key enabling factor of runtime verification of
                 CPS is the ability to integrate real-time simulations
                 of portions of the CPS into live running systems. We
                 propose a verification approach that allows CPS
                 application developers to opportunistically leverage
                 real-time simulation to support runtime verification.
                 Our approach, termed B raceBind, allows selecting, at
                 runtime, between actual physical processes or
                 simulations of them to support a running CPS
                 application. To build BraceBind, we create a real-time
                 simulation architecture to generate and manage multiple
                 real-time simulation environments based on existing
                 simulation models in a manner that ensures sufficient
                 accuracy for verifying a CPS application. Specifically,
                 BraceBind aims to both improve simulation speed and
                 minimize latency, thereby making it feasible to
                 integrate simulations of physical processes into the
                 running CPS application. BraceBind then integrates this
                 real-time simulation architecture with an existing
                 runtime verification approach that has low
                 computational overhead and high accuracy. This
                 integration uses an aspect-oriented adapter
                 architecture that connects the variables in the cyber
                 portion of the CPS application with either sensors and
                 actuators in the physical world or the automatically
                 generated real-time simulation. Our experimental
                 results show that, with a negligible performance
                 penalty, our approach is both efficient and effective
                 in detecting program errors that are otherwise only
                 detectable in a physical deployment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ma:2017:DPE,
  author =       "Kaisheng Ma and Xueqing Li and Huichu Liu and Xiao
                 Sheng and Yiqun Wang and Karthik Swaminathan and
                 Yongpan Liu and Yuan Xie and John Sampson and
                 Vijaykrishnan Narayanan",
  title =        "Dynamic Power and Energy Management for Energy
                 Harvesting Nonvolatile Processor Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "107:1--107:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3077575",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Self-powered systems running on scavenged energy will
                 be a key enabler for pervasive computing across the
                 Internet of Things. The variability of input power in
                 energy-harvesting systems limits the effectiveness of
                 static optimizations aimed at maximizing the
                 input-energy-to-computation ratio. We show that the
                 resultant gap between available and exploitable energy
                 is significant, and that energy storage optimizations
                 alone do not significantly close the gap. We
                 characterize these effects on a real, fabricated
                 energy-harvesting system based on a nonvolatile
                 processor. We introduce a unified energy-oriented
                 approach to first optimize the number of backups, by
                 more aggressively using the stored energy available
                 when power failure occurs, and then optimize forward
                 progress via improving the rate of input energy to
                 computation via dynamic voltage and frequency scaling
                 and self-learning techniques. We evaluate combining
                 these schemes and show capture of up to 75.5\% of all
                 input energy toward processor computation, an average
                 of $ 1.54 \times $ increase over the best static
                 ``Forward Progress'' baseline system. Notably, our
                 energy-optimizing policy combinations simultaneously
                 improve both the rate of forward progress and the rate
                 of backup events (by up to 60.7\% and 79.2\% for RF
                 power, respectively, and up to 231.2\% and reduced to
                 zero, respectively, for solar power). This contrasts
                 with static frequency optimization approaches in which
                 these two metrics are antagonistic.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chatterjee:2017:FTD,
  author =       "Navonil Chatterjee and Suraj Paul and Santanu
                 Chattopadhyay",
  title =        "Fault-Tolerant Dynamic Task Mapping and Scheduling for
                 Network-on-Chip-Based Multicore Platform",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "108:1--108:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055512",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In Network-on-Chip (NoC)-based multicore systems, task
                 allocation and scheduling are known to be important
                 problems, as they affect the performance of
                 applications in terms of energy consumption and timing.
                 Advancement of deep submicron technology has made it
                 possible to scale the transistor feature size to the
                 nanometer range, which has enabled multiple processing
                 elements to be integrated onto a single chip. On the
                 flipside, it has made the integrated entities on the
                 chip more susceptible to different faults. Although a
                 significant amount of work has been done in the domain
                 of fault-tolerant mapping and scheduling, existing
                 algorithms either precompute reconfigured mapping
                 solutions at design time while anticipating fault(s)
                 scenarios or adopt a hybrid approach wherein a part of
                 the fault mitigation strategy relies on the design-time
                 solution. The complexity of the problem rises further
                 for real-time dynamic systems where new applications
                 can arrive in the multicore platform at any time
                 instant. For real-time systems, the validity of
                 computation depends both on the correctness of results
                 and on temporal constraint satisfaction. This article
                 presents an improved fault-tolerant dynamic solution to
                 the integrated problem of application mapping and
                 scheduling for NoC-based multicore platforms. The
                 developed algorithm provides a unified mapping and
                 scheduling method for real-time systems focusing on
                 meeting application deadlines and minimizing
                 communication energy. A predictive model has been used
                 to determine the failure-prone cores in the system for
                 which a fault-tolerant resource allocation with task
                 redundancy has been performed. By selectively using a
                 task replication policy, the reliability of the
                 application, executing on a given NoC platform, is
                 improved. A detailed evaluation of the performance of
                 the proposed algorithm has been conducted for both real
                 and synthetic applications. When compared with other
                 fault-tolerant algorithms reported in the literature,
                 performance of the proposed algorithm shows an average
                 reduction of 56.95\% in task re-execution time overhead
                 and an average improvement of 31\% in communication
                 energy. Further, for time-constrained tasks, deadline
                 satisfaction has also been achieved for most of the
                 test cases by the developed algorithm, whereas the
                 techniques reported in the literature failed to meet
                 deadline in about 45\% test cases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahir:2017:LAR,
  author =       "Prashant Ahir and Mehran Mozaffari-Kermani and Reza
                 Azarderakhsh",
  title =        "Lightweight Architectures for Reliable and Fault
                 Detection {Simon} and {Speck} Cryptographic Algorithms
                 on {FPGA}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "109:1--109:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055514",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The widespread use of sensitive and constrained
                 applications necessitates lightweight (low-power and
                 low-area) algorithms developed for constrained
                 nano-devices. However, nearly all of such algorithms
                 are optimized for platform-based performance and may
                 not be useful for diverse and flexible applications.
                 The National Security Agency (NSA) has proposed two
                 relatively recent families of lightweight ciphers, that
                 is, Simon and Speck, designed as efficient ciphers on
                 both hardware and software platforms. This article
                 proposes concurrent error detection schemes to provide
                 reliable architectures for these two families of
                 lightweight block ciphers. The research work on
                 analyzing the reliability of these algorithms and
                 providing fault diagnosis approaches has not been
                 undertaken to date to the best of our knowledge. The
                 main aim of the proposed reliable architectures is to
                 provide high error coverage while maintaining
                 acceptable area and power consumption overheads. To
                 achieve this, we propose a variant of recomputing with
                 encoded operands. These low-complexity schemes are
                 suited for low-resource applications such as sensitive,
                 constrained implantable and wearable medical devices.
                 We perform fault simulations for the proposed
                 architectures by developing a fault model framework.
                 The architectures are simulated and analyzed on recent
                 field-programmable grate array (FPGA) platforms, and it
                 is shown that the proposed schemes provide high error
                 coverage. The proposed low-complexity concurrent error
                 detection schemes are a step forward toward more
                 reliable architectures for Simon and Speck algorithms
                 in lightweight, secure applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2017:EMW,
  author =       "Chen Pan and Mimi Xie and Chengmo Yang and Yiran Chen
                 and Jingtong Hu",
  title =        "Exploiting Multiple Write Modes of Nonvolatile Main
                 Memory in Embedded Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "110:1--110:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063130",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Existing Nonvolatile Memories (NVMs) have many
                 attractive features to be the main memory of embedded
                 systems. These features include low power, high
                 density, and better scalability. Recently, Multilevel
                 Cell (MLC) NVM has gained more and more popularity as
                 it can provide a higher density than the traditional
                 Single-Level Cell (SLC) NVM. However, there are also
                 drawbacks in MLC NVM, namely, limited write endurance
                 and expensive write operation. These two drawbacks have
                 to be overcome before MLC NVM can be practically
                 adopted as the main memory. In MLC Nonvolatile Main
                 Memory (NVMM), two different types of write operations
                 with very diverse data retention times are allowed. The
                 first type maintains data for years but takes a longer
                 time to write and is detrimental to the endurance. The
                 second type maintains data for a short period but takes
                 a shorter time to write. By observing that much of the
                 data written to main memory is temporary and does not
                 need to last long during the execution of a program, in
                 this article, we propose novel task scheduling and
                 write operation selection algorithms to improve MLC
                 NVMM endurance and program efficiency. An Integer
                 Linear Programming (ILP) formulation is first proposed
                 to obtain optimal results. Since ILP takes exponential
                 time to solve, we also propose the Multiwrite
                 Mode-Aware Scheduling (MMAS) algorithm to achieve a
                 near-optimal solution in polynomial time. Additionally,
                 the Dynamical Memory Block Screening (DMS) algorithm is
                 proposed to achieve wear leveling. The experimental
                 results demonstrate that the proposed techniques can
                 greatly improve the lifetime of the MLC NVMM as well as
                 the efficiency of the program.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2017:TPR,
  author =       "Yu Li and Albert M. K. Cheng",
  title =        "Toward a Practical Regularity-based Model: The Impact
                 of Evenly Distributed Temporal Resource Partitions",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "111:1--111:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092945",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most Hierarchical Real-time Scheduling (HiRTS)
                 techniques have focused on temporal resource partitions
                 in which time units are periodically distributed.
                 Although such periodic partitions could provide great
                 flexibility for the resource-level scheduling,
                 engineers face significant obstacles when trying to
                 determine the schedulability of real-time tasks running
                 on them. The main reason is that periodic partitions
                 fail to effectively bound the difference between the
                 ideal and the actual resource allocation. To solve this
                 problem, some researchers introduced the Regular
                 Partition, a type of temporal resource partition that
                 is almost evenly distributed. Recent research has shown
                 that it achieves maximal transparency for task
                 scheduling-some classical real-time scheduling problems
                 on a regular partition can be easily transformed into
                 equivalent problems on a dedicated single resource.
                 However, the resource partitioning problem for regular
                 partitions is much more complicated than the one for
                 periodic partitions. Based on a practical two-layer
                 HiRTS platform, this article introduces MulZ (Multiple
                 Z-sequences), which is the first to solve this problem
                 with a partitioned scheduling strategy. By using a more
                 complicated approximation methodology, our experimental
                 results show that MulZ outperforms the current best
                 global scheduling algorithm on this problem. After
                 that, it compares the overall performance of the
                 periodic partition and the regular partition. We
                 conclude that the regular partition is a better choice
                 for the integration of real-time applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2017:WAF,
  author =       "Yooseong Kim and David Broman and Aviral Shrivastava",
  title =        "{WCET}-Aware Function-Level Dynamic Code Management on
                 Scratchpad Memory",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "112:1--112:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063383",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Scratchpad memory (SPM) is a promising on-chip memory
                 choice in real-time and cyber-physical systems where
                 timing is of the utmost importance. SPM has
                 time-predictable characteristics since its data
                 movement between the SPM and the main memory is
                 entirely managed by software. One way of such
                 management is dynamic management. In dynamic management
                 of instruction SPMs, code blocks are dynamically copied
                 from the main memory to the SPM at runtime by executing
                 direct memory access (DMA) instructions. Code
                 management techniques try to minimize the overhead of
                 DMA operations by finding an allocation scheme that
                 leads to efficient utilization. In this article, we
                 present three function-level code management
                 techniques. These techniques perform allocation at the
                 granularity of functions, with the objective of
                 minimizing the impact of DMA overhead to the worst-case
                 execution time (WCET) of a given program. The first
                 technique finds an optimal mapping of each function to
                 a region using integer linear programming (ILP),
                 whereas the second technique is a polynomial-time
                 heuristic that is suboptimal. The third technique maps
                 functions directly to SPM addresses, not using regions,
                 which can further reduce the WCET. Based on ILP, it can
                 also find an optimal mapping. We evaluate our
                 techniques using the M{\"a}lardalen WCET suite, MiBench
                 suite, and proprietary automotive applications from
                 industry. The results show that our techniques can
                 significantly reduce the WCET estimates compared to
                 caches with the state-of-the-art cache analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:PNM,
  author =       "Guanjun Liu and Mengchu Zhou and Changjun Jiang",
  title =        "{Petri} Net Models and Collaborativeness for Parallel
                 Processes with Resource Sharing and Message Passing",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "113:1--113:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2810001",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Petri nets are widely used to model and analyse
                 concurrent systems. There exist two distinct classes of
                 Petri nets that focus on different features of
                 concurrent systems. The first one features multiple
                 parallel processes sharing a group of common resources
                 but not interacting/collaborating with each other. The
                 second one allows multiple parallel processes to
                 interact/collaborate with each other via message
                 exchange but does not share any common resources.
                 However, in many distributed environments, multiple
                 processes both interact/collaborate with each other and
                 share some common resources. To model and analyse such
                 systems, this article defines a new class of Petri nets
                 called Parallel Process Nets (P$^2$ Ns) that may be
                 viewed as a generalization of the two mentioned above.
                 We propose collaborativeness and close
                 collaborativeness for P$^2$ Ns. The former guarantees
                 that a modelled system is both deadlock-free and
                 livelock-free, and the latter guarantees that it is
                 deadlock-free, livelock-free, and starvation-free.
                 These concepts and ideas are illustrated through some
                 classical examples such as Producer-Consumer Problem
                 and Dinning Philosophers Problem. Algorithms are
                 developed to decide them. At last, P$^2$ Ns are applied
                 to the modelling and analysis of two real systems:
                 hospital information system and elevator scheduling
                 system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "113",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ciszewski:2017:EAC,
  author =       "Michal Ciszewski and Konrad Iwanicki",
  title =        "Efficient Automated Code Partitioning for
                 Microcontrollers with Switchable Memory Banks",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "114:1--114:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055511",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Switching active memory banks at runtime allows a
                 processor with a narrow address bus to access memory
                 that exceeds ranges normally addressable via the bus.
                 Switching code memory banks is regaining interest in
                 microcontrollers for the Internet of Things (IoT),
                 which have to run continuously growing software, while
                 at the same time consuming ultra-small amounts of
                 energy. To make use of bank switching, such software
                 must be partitioned among the available banks and
                 augmented with bank-switching instructions. In contrast
                 to the augmenting, which is done automatically by a
                 compiler, today the partitioning is normally done
                 manually by programmers. However, since IoT software is
                 cross-compiled on much more powerful machines than its
                 target microcontrollers, it becomes possible to
                 partition it automatically during compilation. In this
                 article, we thus study the problem of partitioning
                 program code among banks such that the resulting
                 runtime performance of the program is maximized. We
                 prove that the problem is NP -hard and propose a
                 heuristic algorithm with a low complexity, so it
                 enables fast compilation and hence interactive software
                 development. The algorithm decomposes the problem into
                 three subproblems and introduces a heuristic for each
                 of them: (1) which pieces of code to partition, (2)
                 which of them to assign to permanently mapped banks,
                 and (3) how to divide the remaining ones among
                 switchable banks. We integrate the algorithm, together
                 with earlier ones, in an open-source compiler and test
                 the resulting solution on synthetic as well as actual
                 commercial IoT software bases, thereby demonstrating
                 its advantages and drawbacks. In particular, the
                 results show that the performance of partitions
                 produced by our algorithm comes close to that of
                 partitions created manually by programmers with expert
                 knowledge on the partitioned code.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "114",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2017:EKM,
  author =       "Yun Liang and Xiuhong Li",
  title =        "Efficient Kernel Management on {GPUs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "115:1--115:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070710",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Graphics Processing Units (GPUs) have been widely
                 adopted as accelerators for compute-intensive
                 applications due to its tremendous computational power
                 and high memory bandwidth. As the complexity of
                 applications continues to grow, each new generation of
                 GPUs has been equipped with advanced architectural
                 features and more resources to sustain its performance
                 acceleration capability. Recent GPUs have been featured
                 with concurrent kernel execution, which is designed to
                 improve the resource utilization by executing multiple
                 kernels simultaneously. However, it is still a
                 challenge to find a way to manage the resources on GPUs
                 for concurrent kernel execution. Prior works only
                 achieve limited performance improvement as they do not
                 optimize the thread-level parallelism (TLP) and model
                 the resource contention for the concurrently executing
                 kernels. In this article, we design an efficient kernel
                 management framework that optimizes the performance for
                 concurrent kernel execution on GPUs. Our kernel
                 management framework contains two key components: TLP
                 modulation and cache bypassing. The TLP modulation is
                 employed to adjust the TLP for the concurrently
                 executing kernels. It consists of three parts: kernel
                 categorization, static TLP modulation, and dynamic TLP
                 modulation. The cache bypassing is proposed to mitigate
                 the cache contention by only allowing a subset of a
                 kernel's blocks to access the L1 data cache.
                 Experiments indicate that our framework can improve the
                 performance by $ 1.51 \times $ on average
                 (energy-efficiency by $ 1.39 \times $ on average),
                 compared with the default concurrent kernel execution
                 framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "115",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2017:ESD,
  author =       "Yuliang Sun and Lanjun Wang and Chen Wang and Yu
                 Wang",
  title =        "Exploiting Stable Data Dependency in Stream Processing
                 Acceleration on {FPGAs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "116:1--116:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092950",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the unique feature of fine-grained parallelism,
                 field-programmable gate arrays (FPGAs) show great
                 potential for streaming algorithm acceleration.
                 However, the lack of a design framework, restrictions
                 on FPGAs, and ineffective tools impede the utilization
                 of FPGAs in practice. In this study, we provide a
                 design paradigm to support streaming algorithm
                 acceleration on FPGAs. We first propose an abstract
                 model to describe streaming algorithms with homogeneous
                 sub-functions (HSF) and stable data dependency (SDD),
                 which we call the HSF-SDD model. Using this model, we
                 then develop an FPGA framework, PE-Ring, that has the
                 advantages of (1) fully exploiting algorithm
                 parallelism to achieve high performance, (2) leveraging
                 block RAM to serve large scale parameters, and (3)
                 enabling flexible parameter adjustments. Based on the
                 proposed model and framework, we finally implement a
                 specific converter to generate the register-transfer
                 level representation of the PE-Ring. Experimental
                 results show that our method outperforms ordinary FPGA
                 design tools by one to two orders of magnitude.
                 Experiments also demonstrate the scalability of the
                 PE-Ring.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "116",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:HPI,
  author =       "Zhe Liu and Thomas P{\"o}ppelmann and Tobias Oder and
                 Hwajeong Seo and Sujoy Sinha Roy and Tim G{\"u}neysu
                 and Johann Gro{\ss}sch{\"a}dl and Howon Kim and Ingrid
                 Verbauwhede",
  title =        "High-Performance Ideal Lattice-Based Cryptography on
                 $8$-Bit {AVR} Microcontrollers",
  journal =      j-TECS,
  volume =       "16",
  number =       "4",
  pages =        "117:1--117:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092951",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 14 18:53:33 MDT 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Over recent years lattice-based cryptography has
                 received much attention due to versatile average-case
                 problems like Ring-LWE or Ring-SIS that appear to be
                 intractable by quantum computers. In this work, we
                 evaluate and compare implementations of Ring-LWE
                 encryption and the bimodal lattice signature scheme
                 (BLISS) on an 8-bit Atmel ATxmega128 microcontroller.
                 Our implementation of Ring-LWE encryption provides
                 comprehensive protection against timing side-channels
                 and takes 24.9ms for encryption and 6.7ms for
                 decryption. To compute a BLISS signature, our software
                 takes 317ms and 86ms for verification. These results
                 underline the feasibility of lattice-based cryptography
                 on constrained devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "117",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Malik:2017:MCH,
  author =       "Avinash Malik and Partha S. Roop and Sidharta Andalam
                 and Mark Trew and Michael Mendler",
  title =        "Modular Compilation of Hybrid Systems for Emulation
                 and Large Scale Simulation",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "118:1--118:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126536",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hybrid systems combine discrete controllers with
                 adjoining physical processes. While many approaches
                 exist for simulating hybrid systems, there are few
                 approaches for their emulation, especially when the
                 actual physical plant is not available. This paper
                 develops the first formal framework for emulation along
                 with a new compiler that enables large-scale (1000+
                 components) simulation. We propose a formal model
                 called Synchronous Emulation Automaton (SEA)
                 specifically for modular compilation and parallel
                 execution. SEA combines Linear Time Invariant (LTI)
                 systems with discrete mode switches and has the
                 following semantic differences with Hybrid Automata:
                 (1) the Ordinary Differential Equations are solved
                 analytically and the solutions are sampled at the
                 Worst-Case Reaction Time of the model and (2) we
                 develop a new composition semantics, which allows
                 individual SEAs to execute in parallel with each other.
                 The proposed semantics eliminates: (a) the need for
                 dynamic numerical solvers, and (b) the Zeno-phenomenon
                 by construction. Experimental results show that process
                 models designed using our tool (Piha) give a 3.6 times
                 execution speedup over Simulink\reg, and up to 26 times
                 speedup on manycore architectures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "118",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Blindell:2017:CPU,
  author =       "Gabriel Hjort Blindell and Mats Carlsson and Roberto
                 Casta{\~n}eda Lozano and Christian Schulte",
  title =        "Complete and Practical Universal Instruction
                 Selection",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "119:1--119:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126528",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In code generation, instruction selection chooses
                 processor instructions to implement a program under
                 compilation where code quality crucially depends on the
                 choice of instructions. Using methods from
                 combinatorial optimization, this paper proposes an
                 expressive model that integrates global instruction
                 selection with global code motion. The model introduces
                 (1) handling of memory computations and function calls,
                 (2) a method for inserting additional jump instructions
                 where necessary, (3) a dependency-based technique to
                 ensure correct combinations of instructions, (4) value
                 reuse to improve code quality, and (5) an objective
                 function that reduces compilation time and increases
                 scalability by exploiting bounding techniques. The
                 approach is demonstrated to be complete and practical,
                 competitive with LLVM, and potentially optimal (w.r.t.
                 the model) for medium-sized functions. The results show
                 that combinatorial optimization for instruction
                 selection is well-suited to exploit the potential of
                 modern processors in embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "119",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Su:2017:EWA,
  author =       "Xuesong Su and Hui Wu and Jingling Xue",
  title =        "An Efficient {WCET}-Aware Instruction Scheduling and
                 Register Allocation Approach for Clustered {VLIW}
                 Processors",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "120:1--120:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126524",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In real-time embedded system design, one major goal is
                 to construct a feasible schedule. Whether a feasible
                 schedule exists depends on the Worst-Case Execution
                 Time (WCET) of each task. Consequently, it is important
                 to minimize the WCET of each task. We investigate the
                 problem of instruction scheduling and register
                 allocation for a program executed on a clustered Very
                 Long Instruction Word (VLIW) processor such that the
                 WCET of the program is minimized, and propose a novel,
                 unified instruction scheduling and register allocation
                 heuristic approach. Our heuristic approach is
                 underpinned by a set of novel techniques, including
                 spanning graph-based WCET-aware live range splitting,
                 WCET-aware dynamic register pressure control,
                 WCET-aware basic block prioritization for performing
                 integrated instruction scheduling and register
                 allocation, and WCET-aware spill code handling. We have
                 implemented our approach in Trimaran 4.0, and compared
                 it with the state-of-the-art approach by using a set of
                 20 benchmarks. The experimental results show that our
                 approach achieves the maximum WCET improvement of
                 29.61\% and the average WCET improvement of 10.23\%,
                 respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "120",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Micolet:2017:SDP,
  author =       "Paul-Jules Micolet and Aaron Smith and Christophe
                 Dubach",
  title =        "A Study of Dynamic Phase Adaptation Using a Dynamic
                 Multicore Processor",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "121:1--121:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126523",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneous processors such as ARM's big.LITTLE have
                 become popular for embedded systems. They offer a
                 choice between running workloads on a high performance
                 core or a low-energy core leading to increased energy
                 efficiency. However, the core configurations are fixed
                 at design time which offers a limited amount of
                 adaptation. Dynamic Multicore Processors (DMPs) bridge
                 the gap between homogeneous and fully reconfigurable
                 systems. Cores can fuse dynamically to adapt the
                 computational resources to the needs of different
                 workloads. There exists multiple examples of DMPs in
                 the literature, yet the focus has mainly been on static
                 partitioning. This paper conducts the first thorough
                 study of the potential for dynamic reconfiguration of
                 DMPs at runtime. We study how performance varies with
                 static partitioning and what software optimizations are
                 required to achieve high performance. We show that
                 energy consumption is reduced considerably when
                 adapting the number of cores to program phases, and
                 introduce a simple online model which predicts the
                 optimal number of cores to use to minimize energy
                 consumption while maintaining high performance. Using
                 the San Diego Vision Benchmark Suite as a use case, the
                 dynamic scheme leads to $ \approx $40\% energy savings
                 on average without decreasing performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "121",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Trub:2017:IPM,
  author =       "Roman Tr{\"u}b and Georgia Giannopoulou and Andreas
                 Tretter and Lothar Thiele",
  title =        "Implementation of Partitioned Mixed-Criticality
                 Scheduling on a Multi-Core Platform",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "122:1--122:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126533",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent industrial trends favor the adoption of
                 multi-core architectures for mixed-criticality
                 applications. Although several mixed-criticality
                 multi-core scheduling approaches have been proposed,
                 currently there are few implementations on hardware
                 that demonstrate efficient resource utilization and the
                 ability to bound interference on shared resources. To
                 address this necessity, we develop a mixed-criticality
                 runtime environment on the Kalray MPPA-256 Andey
                 many-core platform. The runtime environment implements
                 a scheduling policy based on adaptive temporal
                 partitioning. We develop models, methods and
                 implementation principles to implement the necessary
                 scheduling primitives, to achieve high platform
                 utilization and to perform a compositional worst-case
                 execution time analysis. The bounds account for
                 scheduling overheads and for the inter-task
                 interference on the platform's shared memory. Using
                 realistic benchmarks from avionics and signal
                 processing, we validate the correctness and tightness
                 of the bounds and demonstrate a high platform
                 utilization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "122",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gupta:2017:DDP,
  author =       "Ujjwal Gupta and Chetan Arvind Patil and Ganapati Bhat
                 and Prabhat Mishra and Umit Y. Ogras",
  title =        "{DyPO}: Dynamic {Pareto}-Optimal Configuration
                 Selection for Heterogeneous {MpSoCs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "123:1--123:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126530",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern multiprocessor systems-on-chip (MpSoCs) offer
                 tremendous power and performance optimization
                 opportunities by tuning thousands of potential voltage,
                 frequency and core configurations. As the workload
                 phases change at runtime, different configurations may
                 become optimal with respect to power, performance or
                 other metrics. Identifying the optimal configuration at
                 runtime is infeasible due to the large number of
                 workloads and configurations. This paper proposes a
                 novel methodology that can find the Pareto-optimal
                 configurations at runtime as a function of the
                 workload. To achieve this, we perform an extensive
                 offline characterization to find classifiers that map
                 performance counters to optimal configurations. Then,
                 we use these classifiers and performance counters at
                 runtime to choose Pareto-optimal configurations. We
                 evaluate the proposed methodology by maximizing the
                 performance per watt for 18 single- and multi-threaded
                 applications. Our experiments demonstrate an average
                 increase of 93\%, 81\% and 6\% in performance per watt
                 compared to the interactive, on demand and powersave
                 governors, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "123",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Naresh:2017:CCC,
  author =       "Vignyan Reddy Kothinti Naresh and Dibakar Gope and
                 Mikko H. Lipasti",
  title =        "The {CURE}: Cluster Communication Using Registers",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "124:1--124:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126527",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "VLIW processors typically deliver high performance on
                 limited budget making them ideal for a variety of
                 communication and signal processing solutions. These
                 processors typically need large multi-ported register
                 files that can have side effects of increased cycle
                 time and high power consumption. The access delay and
                 energy of these register files can also become
                 prohibitive when increasing the register count or the
                 access ports, thus limiting the overall performance of
                 the processor. Most prior art circumvent this problem
                 by using multiple clusters with private register files,
                 to lower the access delay and reduce energy
                 consumption. However, clustering artifacts, like
                 increased inter--cluster communication operations and
                 spill-recovery code, result in a performance penalty.
                 This paper proposes CURE --- a novel technique to
                 considerably reduce the negative effects of clustering.
                 CURE augments the ISA to expose the communication
                 registers to the compilers to increase availability of
                 architectural register state to all functional units.
                 The inter--cluster communication operations are
                 integrated into regular ALU and memory operations to
                 improve instruction encoding efficiency. We also
                 propose a new code scheduling heuristic to handle the
                 ISA changes, and to realize the improvements in
                 processor's performance and energy consumption. Our
                 quantitative analysis estimates that CURE, when
                 compared to the baseline 8--issue uni--cluster
                 processor, boosts average performance by 61\% while
                 reducing the average register dynamic energy by 77\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "124",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Josipovic:2017:OLS,
  author =       "Lana Josipovic and Philip Brisk and Paolo Ienne",
  title =        "An Out-of-Order Load-Store Queue for Spatial
                 Computing",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "125:1--125:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126525",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The efficiency of spatial computing depends on the
                 ability to achieve maximal parallelism. This
                 necessitates memory interfaces that can correctly
                 handle memory accesses that arrive in arbitrary order
                 while still respecting data dependencies and ensuring
                 appropriate ordering for semantic correctness. However,
                 a typical memory interface for out-of-order processors
                 (i.e., a load-store queue) cannot immediately meet
                 these requirements: a different allocation policy is
                 needed to achieve out-of-order execution in spatial
                 systems that naturally omit the notion of sequential
                 program order, a fundamental piece of information for
                 correct execution. We show a novel and practical way to
                 organize the allocation for an out-of-order load-store
                 queue for spatial computing. The main idea is to
                 dynamically allocate groups of memory accesses
                 (depending on the dynamic behavior of the application),
                 where the access order within the group is statically
                 predetermined (for instance by a high-level synthesis
                 tool). We detail the construction of our load-store
                 queue and demonstrate on a few practical cases its
                 advantages over standard accelerator-memory
                 interfaces.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "125",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Crites:2017:DCE,
  author =       "Brian Crites and Karen Kong and Philip Brisk",
  title =        "Diagonal Component Expansion for Flow-Layer Placement
                 of Flow-Based Microfluidic Biochips",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "126:1--126:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126529",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Continuous flow-based microfluidic devices have seen a
                 huge increase in interest because of their ability to
                 automate and miniaturize biochemistry and biological
                 processes, as well as their promise of creating a
                 programmable platform for chemical and biological
                 experimentation. The major hurdle in the adoption of
                 these types of devices is in the design, which is
                 largely done by hand using tools such as AutoCAD or
                 SolidWorks, which require immense domain knowledge and
                 are hard to scale. This paper investigates the problem
                 of automated physical design for continuous flow-based
                 microfluidic very large scale integration (mVLSI)
                 biochips, starting from a netlist specification of the
                 flow layer. After an initial planar graph embedding,
                 vertices in the netlist are expanded into
                 two-dimensional components, followed by fluid channel
                 routing. A new heuristic, DIagonal Component Expansion
                 (DICE) is introduced for the component expansion step.
                 Compared to a baseline expansion method, DICE improves
                 area utilization by a factor of 8.90x and reduces
                 average fluid routing channel length by 47.4\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "126",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Elfar:2017:SER,
  author =       "Mahmoud Elfar and Zhanwei Zhong and Zipeng Li and
                 Krishnendu Chakrabarty and Miroslav Pajic",
  title =        "Synthesis of Error-Recovery Protocols for
                 Micro-Electrode-Dot-Array Digital Microfluidic
                 Biochips",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "127:1--127:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126538",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A digital microfluidic biochip (DMFB) is an attractive
                 technology platform for various biomedical
                 applications. However, a conventional DMFB is limited
                 by: (i) the number of electrical connections that can
                 be practically realized, (ii) constraints on droplet
                 size and volume, and (iii) the need for special
                 fabrication processes and the associated
                 reliability/yield concerns. To overcome the above
                 challenges, DMFBs based on a micro-electrode-dot-array
                 (MEDA) architecture have been proposed and fabricated
                 recently. Error recovery is of key interest for MEDA
                 biochips due to the need for system reliability. Errors
                 are likely to occur during droplet manipulation due to
                 defects, chip degradation, and the uncertainty inherent
                 in biochemical experiments. In this paper, we first
                 formalize error-recovery objectives, and then
                 synthesize optimal error-recovery protocols using a
                 model based on Stochastic Multiplayer Games (SMGs). We
                 also present a global error-recovery technique that can
                 update the schedule of fluidic operations in an
                 adaptive manner. Using three representative real-life
                 bioassays, we show that the proposed approach can
                 effectively reduce the bioassay completion time and
                 increase the probability of success for error
                 recovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "127",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gottscho:2017:LCM,
  author =       "Mark Gottscho and Irina Alam and Clayton Schoeny and
                 Lara Dolecek and Puneet Gupta",
  title =        "Low-Cost Memory Fault Tolerance for {IoT} Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "128:1--128:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126534",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "IoT devices need reliable hardware at low cost. It is
                 challenging to efficiently cope with both hard and soft
                 faults in embedded scratchpad memories. To address this
                 problem, we propose a two-step approach: FaultLink and
                 Software-Defined Error-Localizing Codes (SDELC).
                 FaultLink avoids hard faults found during testing by
                 generating a custom-tailored application binary image
                 for each individual chip. During software
                 deployment-time, FaultLink optimally packs small
                 sections of program code and data into fault-free
                 segments of the memory address space and generates a
                 custom linker script for a lazy-linking procedure.
                 During run-time, SDELC deals with unpredictable soft
                 faults via novel and inexpensive Ultra-Lightweight
                 Error-Localizing Codes (UL-ELCs). These require fewer
                 parity bits than single-error-correcting Hamming codes.
                 Yet our UL-ELCs are more powerful than basic
                 single-error-detecting parity: they localize single-bit
                 errors to a specific chunk of a codeword. SDELC then
                 heuristically recovers from these localized errors
                 using a small embedded C library that exploits
                 observable side information (SI) about the
                 application's memory contents. SI can be in the form of
                 redundant data (value locality), legal/illegal
                 instructions, etc. Our combined FaultLink+SDELC
                 approach improves min-VDD by up to 440 mV and correctly
                 recovers from up to 90\% (70\%) of random single-bit
                 soft faults in data (instructions) with just three
                 parity bits per 32-bit word.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "128",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yantir:2017:AMM,
  author =       "Hasan Erdem Yantir and Ahmed M. Eltawil and Fadi J.
                 Kurdahi",
  title =        "Approximate Memristive In-memory Computing",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "129:1--129:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126526",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The bottleneck between the processing elements and
                 memory is the biggest issue contributing to the
                 scalability problem in computing. In-memory computation
                 is an alternative approach that combines memory and
                 processor in the same location, and eliminates the
                 potential memory bottlenecks. Associative processors
                 are a promising candidate for in-memory computation,
                 however the existing implementations have been deemed
                 too costly and power hungry. Approximate computing is
                 another promising approach for energy-efficient digital
                 system designs where it sacrifices the accuracy for the
                 sake of energy reduction and speedup in error-resilient
                 applications. In this study, approximate in-memory
                 computing is introduced in memristive associative
                 processors. Two approximate computing methodologies are
                 proposed; bit trimming and memristance scaling. Results
                 show that the proposed methods not only reduce energy
                 consumption of in-memory parallel computing but also
                 improve their performance. As compared to other
                 existing approximate computing methodologies on
                 different architectures (e.g., CPU, GPU, and ASIC),
                 approximate memristive in-memory computing exhibits
                 better results in terms of energy reduction (up to 80x)
                 and speedup (up to 20x) on a variety of benchmarks from
                 different domains when quality degradation is limited
                 to 10\% and it confirms that memristive associative
                 processors provide a highly-promising platform for
                 approximate computing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "129",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Raha:2017:QIA,
  author =       "Arnab Raha and Vijay Raghunathan",
  title =        "{qLUT}: Input-Aware Quantized Table Lookup for
                 Energy-Efficient Approximate Accelerators",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "130:1--130:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126531",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Approximate computing has emerged as a popular design
                 paradigm for optimizing the performance and energy
                 consumption of error-resilient applications in domains
                 such as machine learning, graphics, data analytics,
                 etc. Numerous techniques for approximate computing have
                 been proposed at different layers of the system stack,
                 from circuits to architecture to software. In this
                 work, we propose a new technique, called quantized
                 table lookup, for approximating the meta-functions used
                 in the core computational kernels of error-resilient
                 applications. In contrast to prior work that directly
                 approximates the functionality of the meta-functions,
                 the proposed technique instead approximates the input
                 data to the meta-functions by reducing/quantizing them
                 to a much smaller set of values that we call quantized
                 inputs. The small number of quantized inputs enables us
                 to completely replace the energy-intensive arithmetic
                 units in the meta-function with small and
                 energy-efficient lookup tables (called quantized lookup
                 tables or qLUT) that contain precomputed output values
                 corresponding to the quantized inputs. The proposed
                 approximation technique is not only highly generic, but
                 also inherently quality-configurable and input-aware.
                 Quality-configurability and input-awareness are
                 achieved by modulating the size of the qLUT as well as
                 selecting the values of the quantized inputs
                 judiciously based on the statistics of the original
                 input data. To evaluate the proposed technique, we have
                 implemented the dominant meta-functions of nine
                 error-resilient application benchmarks as quantized
                 table lookup based hardware accelerators using 45nm
                 technology. Experimental results demonstrate average
                 energy savings of 46\% at the application-level for
                 minimal ($<$ 1\%) loss in output quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "130",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Egilmez:2017:UAF,
  author =       "Begum Egilmez and Matthew Schuchhardt and Gokhan Memik
                 and Raid Ayoub and Niranjan Soundararajan and Michael
                 Kishinevsky",
  title =        "User-aware Frame Rate Management in {Android
                 Smartphones}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "131:1--131:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126539",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Frame rate has a direct impact on the energy
                 consumption of smartphones: the higher the frame rate,
                 the higher the power consumption. Hence, reducing
                 display refreshes will reduce the power consumption.
                 However, it is risky to manipulate frame rate
                 drastically as it can deteriorate user satisfaction
                 with the device. In this work, we introduce a screen
                 management system that controls the frame rate on
                 smartphone displays based on a model that detects user
                 dissatisfaction due to display refreshes. This approach
                 is based on understanding when higher frame rates are
                 necessary, and providing lower frame rates -thus,
                 saving power- if the lower rate is predicted not to
                 cause user dissatisfaction. According to the results of
                 our first user survey with 20 participants, individuals
                 show highly varying requirements: while some users
                 require high frame rates for the highest satisfaction,
                 others are equally satisfied with lower frame rates.
                 Based on this observation, we develop a system that
                 predicts user dissatisfaction on the runtime and either
                 increases or decreases the maximum frame rate setting.
                 For user dissatisfaction predictions, we have compared
                 two different approaches: (1) static model, which uses
                 dissatisfaction characteristics of a fixed group of
                 people, and (2) user-specific model, which is learning
                 only from the specific user. Our second set of
                 experiments with 20 participants shows that users
                 report 32\% less dissatisfaction and 4\% more
                 dissatisfaction than the default Android system with
                 user-specific and static systems, respectively. These
                 experiments also show that, compared to the default
                 scheme, our mechanisms reduce the power consumption of
                 the phone by 7.2\% and 1.8\% on average with the
                 user-specific and static models, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "131",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yan:2017:FFI,
  author =       "Hao Yan and Lei Jiang and Lide Duan and Wei-Ming Lin
                 and Eugene John",
  title =        "{FlowPaP} and {FlowReR}: Improving Energy Efficiency
                 and Performance for {STT-MRAM}-Based Handheld Devices
                 under Read Disturbance",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "132:1--132:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126532",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Handheld devices, such as smartphones and tablets,
                 currently dominate the semiconductor market. The memory
                 access patterns of CPU and IP cores are dramatically
                 different in a handheld device, making the main memory
                 a critical bottleneck of the entire system. As a
                 result, non-volatile memories, such as spin transfer
                 torque magnetoresistive random-access memory
                 (STT-MRAM), are emerging as a replacement for the
                 existing DRAM-based main memory, achieving a wide
                 variety of advantages. However, replacing DRAM with
                 STT-MRAM also results in new design challenges
                 including read disturbance. A simple read-and-restore
                 scheme preserves data integrity under read disturbance,
                 but incurs significant performance and energy
                 overheads. Consequently, by utilizing unique
                 characteristics of mobile applications, we propose
                 FlowPaP, a flow pattern prediction scheme to
                 dynamically predict the write-to-last-read distances
                 for data frames running on a handheld device. FlowPaP
                 identifies and removes unnecessary memory restores
                 originally required for preventing read disturbance,
                 significantly improving energy efficiency and
                 performance for STT-MRAM-based handheld devices. In
                 addition, we propose a flow-based data retention time
                 reduction scheme named FlowReR to further lower energy
                 consumption of STT-MRAM at the expense of reducing its
                 data retention time. FlowReR imposes a second step that
                 marginally trades off the already improved energy
                 efficiency for performance improvements. Experimental
                 results show that, compared to the original
                 read-and-restore scheme, the application of FlowPaP and
                 FlowReR together can simultaneously improve energy
                 efficiency by 34\% and performance by 17\% for a set of
                 commonly used Android applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "132",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rai:2017:UCG,
  author =       "Siddharth Rai and Mainak Chaudhuri",
  title =        "Using Criticality of {GPU} Accesses in Memory
                 Management for {CPU--GPU} Heterogeneous Multi-Core
                 Processors",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "133:1--133:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126540",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneous chip-multiprocessors with CPU and GPU
                 integrated on the same die allow sharing of critical
                 memory system resources among the CPU and GPU
                 applications. Such architectures give rise to
                 challenging resource scheduling problems. In this
                 paper, we explore memory access scheduling algorithms
                 driven by criticality of GPU accesses in such systems.
                 Different GPU access streams originate from different
                 parts of the GPU rendering pipeline, which behaves very
                 differently from the typical CPU pipeline requiring new
                 techniques for GPU access criticality estimation. We
                 propose a novel queuing network model to estimate the
                 performance-criticality of the GPU access streams. If a
                 GPU application performs below the quality of service
                 requirement (e.g., frame rate in 3D scene rendering),
                 the memory access scheduler uses the estimated
                 criticality information to accelerate the critical GPU
                 accesses. Detailed simulations done on a heterogeneous
                 chip-multiprocessor model with one GPU and four CPU
                 cores running heterogeneous mixes of DirectX, OpenGL,
                 and CPU applications show that our proposal improves
                 the GPU performance by 15\% on average without
                 degrading the CPU performance much. Extensions proposed
                 for the mixes containing GPGPU applications, which do
                 not have any quality of service requirement, improve
                 the performance by 7\% on average for these mixes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "133",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kang:2017:RLA,
  author =       "Wonkyung Kang and Dongkun Shin and Sungjoo Yoo",
  title =        "Reinforcement Learning-Assisted Garbage Collection to
                 Mitigate Long-Tail Latency in {SSD}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "134:1--134:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126537",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "NAND flash memory is widely used in various systems,
                 ranging from real-time embedded systems to enterprise
                 server systems. Because the flash memory has
                 erase-before-write characteristics, we need
                 flash-memory management methods, i.e., address
                 translation and garbage collection. In particular,
                 garbage collection (GC) incurs long-tail latency, e.g.,
                 100 times higher latency than the average latency at
                 the 99$^{th}$ percentile. Thus, real-time and
                 quality-critical systems fail to meet the given
                 requirements such as deadline and QoS constraints. In
                 this study, we propose a novel method of GC based on
                 reinforcement learning. The objective is to reduce the
                 long-tail latency by exploiting the idle time in the
                 storage system. To improve the efficiency of the
                 reinforcement learning-assisted GC scheme, we present
                 new optimization methods that exploit fine-grained GC
                 to further reduce the long-tail latency. The
                 experimental results with real workloads show that our
                 technique significantly reduces the long-tail latency
                 by 29--36\% at the 99.99$^{th}$ percentile compared to
                 state-of-the-art schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "134",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tretter:2017:MAC,
  author =       "Andreas Tretter and Georgia Giannopoulou and Matthias
                 Baer and Lothar Thiele",
  title =        "Minimising Access Conflicts on Shared Multi-Bank
                 Memory",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "135:1--135:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126535",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A common multi-core pattern consists of processors
                 communicating through shared, multi-banked on-chip
                 memory. Two approaches exist: Interleaved address
                 mapping, which spreads consecutive data over all banks,
                 and contiguous address mapping, which stores
                 consecutive data on a single bank. In this work, we
                 compare both approaches on the Kalray MPPA-256
                 platform. For contiguous mapping, we propose an
                 algorithm, based on graph colouring techniques, to
                 automatically perform the assignment of data blocks to
                 memory banks with the goal of minimising access
                 collisions and delays. Experiments with representative,
                 parallel real-world benchmarks show that 69\% of the
                 tested configurations, when optimised for contiguous
                 mapping by our algorithm, run up to 86\% faster on
                 average than with interleaved mapping.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "135",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Riazi:2017:CSC,
  author =       "M. Sadegh Riazi and Mohammad Samragh and Farinaz
                 Koushanfar",
  title =        "{CAMsure}: Secure Content-Addressable Memory for
                 Approximate Search",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "136:1--136:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126547",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We introduce CAMsure, the first realization of secure
                 Content Addressable Memory (CAM) in the context of
                 approximate search using near-neighbor algorithms.
                 CAMsure provides a lightweight solution for practical
                 secure (approximate) search with a minimal drop in the
                 accuracy of the search results. CAM has traditionally
                 been used as a hardware search engine that explores the
                 entire memory in a single clock cycle. However, there
                 has been little attention to the security of the data
                 stored in CAM. Our approach stores distance-preserving
                 hash embeddings within CAM to ensure data privacy. The
                 hashing method provides data confidentiality while
                 preserving similarity in the sense that a high
                 resemblance in the data domain is translated to a small
                 Hamming distance in the hash domain. Consequently, the
                 objective of near-neighbor search is converted to
                 approximate lookup table search which is compatible
                 with the realizations of emerging content addressable
                 memories. Our methodology delivers on average two
                 orders of magnitude faster response time compared to
                 RAM-based solutions that preserve the privacy of data
                 owners.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "136",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Piccolboni:2017:ECF,
  author =       "Luca Piccolboni and Alessandro Menon and Graziano
                 Pravadelli",
  title =        "Efficient Control-Flow Subgraph Matching for Detecting
                 Hardware {Trojans} in {RTL} Models",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "137:1--137:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126552",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Only few solutions for Hardware Trojan (HT) detection
                 work at Register-Transfer Level (RTL), thus delaying
                 the identification of possible security issues at lower
                 abstraction levels of the design process. In addition,
                 the most of existing approaches work only for specific
                 kinds of HTs. To overcome these limitations, we present
                 a verification approach that detects different types of
                 HTs in RTL models by exploiting an efficient
                 control-flow subgraph matching algorithm. The
                 prototypes of HTs that can be detected are modelled in
                 a library by using Control-Flow Graphs (CFGs) that can
                 be parametrised and extended to cover several variants
                 of Trojan patterns. Experimental results show that our
                 approach is effective and efficient in comparison with
                 other state-of-the-art solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "137",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Migliore:2017:HSA,
  author =       "Vincent Migliore and C{\'e}dric Seguin and Maria
                 M{\'e}ndez Real and Vianney Lapotre and Arnaud
                 Tisserand and Caroline Fontaine and Guy Gogniat and
                 Russell Tessier",
  title =        "A High-Speed Accelerator for Homomorphic Encryption
                 using the {Karatsuba} Algorithm",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "138:1--138:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126558",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Somewhat Homomorphic Encryption (SHE) schemes can be
                 used to carry out operations on ciphered data. In a
                 cloud computing scenario, personal information can be
                 processed secretly, inferring a high level of
                 confidentiality. The principle limitation of SHE is the
                 size of ciphertext compared to the size of the message.
                 This issue can be addressed by using a batching
                 technique that ``packs'' several messages into one
                 ciphertext. However, this method leads to important
                 drawbacks in standard implementations. This paper
                 presents a fast hardware/software co-design
                 implementation of an encryption procedure using the
                 Karatsuba algorithm. Our hardware accelerator is 1.5
                 times faster than the state of the art for 1 encryption
                 and 4 times faster for 4 encryptions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "138",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2017:FAK,
  author =       "Jiacheng Zhang and Youyou Lu and Jiwu Shu and Xiongjun
                 Qin",
  title =        "{FlashKV}: Accelerating {KV} Performance with
                 Open-Channel {SSDs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "139:1--139:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126545",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As the cost-per-bit of solid state disks is decreasing
                 quickly, SSDs are supplanting HDDs in many cases,
                 including the primary storage of key-value stores.
                 However, simply deploying LSM-tree-based key-value
                 stores on commercial SSDs is inefficient and induces
                 heavy write amplification and severe garbage collection
                 overhead under write-intensive conditions. The main
                 cause of these critical issues comes from the triple
                 redundant management functionalities lying in the
                 LSM-tree, file system and flash translation layer,
                 which block the awareness between key-value stores and
                 flash devices. Furthermore, we observe that the
                 performance of LSM-tree-based key-value stores is
                 improved little by only eliminating these redundant
                 layers, as the I/O stacks, including the cache and
                 scheduler, are not optimized for LSM-tree's unique I/O
                 patterns. To address the issues above, we propose
                 FlashKV, an LSM-tree based key-value store running on
                 open-channel SSDs. FlashKV eliminates the redundant
                 management and semantic isolation by directly managing
                 the raw flash devices in the application layer. With
                 the domain knowledge of LSM-tree and the open-channel
                 information, FlashKV employs a parallel data layout to
                 exploit the internal parallelism of the flash device,
                 and optimizes the compaction, caching and I/O
                 scheduling mechanisms specifically. Evaluations show
                 that FlashKV effectively improves system performance by
                 $ 1.5 \times $ to $ 4.5 \times $ and decreases up to
                 50\% write traffic under heavy write conditions,
                 compared to LevelDB.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "139",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2017:PBB,
  author =       "Hong Seok Kim and Eyee Hyun Nam and Ji Hyuck Yun and
                 Sheayun Lee and Sang Lyul Min",
  title =        "{P-BMS}: a Bad Block Management Scheme in Parallelized
                 Flash Memory Storage Devices",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "140:1--140:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126550",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Flash memory is used as a main data storage medium in
                 increasingly large areas of applications, rapidly
                 replacing hard disk drives because of its low power
                 consumption, fast random access, and high shock
                 resistance. Such flash-based storage devices generally
                 incorporate multiple flash memory chips to meet the
                 ever growing capacity demands. Using multiple chips in
                 a single storage device, at the same time, opens an
                 opportunity to boost the performance based on
                 multi-unit parallelism. However, parallel execution of
                 multiple flash operations introduces complications when
                 bad blocks occur, which is unavoidable due to flash
                 memory's physical characteristics. The situation gets
                 even worse when bad block occurrences are accompanied
                 by sudden power failures. We propose a bad block
                 management scheme called P-BMS that can fully utilize
                 flash-level parallelism, while guaranteeing provably
                 correct block replacement. Experiments show that our
                 P-BMS achieves a throughput that is more than 95\% of
                 the maximum bandwidth of the flash controller, even
                 with bad block occurrences far heavier than in real
                 flash memory.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "140",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2017:PIE,
  author =       "Fei Wu and Meng Zhang and Yajuan Du and Xubin He and
                 Ping Huang and Changsheng Xie and Jiguang Wan",
  title =        "A Program Interference Error Aware {LDPC} Scheme for
                 Improving {NAND} Flash Decoding Performance",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "141:1--141:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126563",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "By scaling down to smaller cell size, NAND flash has
                 significantly increased the storage capacity in order
                 to lower the unit cost down. However, the reliability
                 is sacrificed due to much higher raw bit error rates.
                 As a result, conventional error correction codes
                 (ECCs), such as BCH codes, are not sufficient.
                 Low-density parity check (LDPC) codes with stronger
                 error correction capability are adopted in NAND flash
                 to guarantee data reliability. However, read
                 performance using LDPC is poor because of its decoding
                 complexity. It has been found that flash cells with
                 fewer electrons are more prone to program interference
                 errors. As a result, program interference errors show
                 the characteristic of value dependence. This
                 characteristic can be exploited and translated into
                 extra information facilitating the decoding
                 convergence. Motivated by this observation, we propose
                 PEAL: a flash program interference error aware LDPC
                 scheme to enhance the decoding performance. PEAL
                 integrates the obtained extra information from the
                 value dependence into the soft-to-hard decision process
                 in LDPC decoding to decrease decoding iterations and
                 improve the decoding convergence speed. Simulation
                 results show that decoding iterations are reduced by up
                 to 69.37\% and the decoding convergence speed is
                 improved by up to $ 2.5 \times $, compared with the
                 normalized min-sum (NMS) algorithm with 2KB information
                 lengths at an approximate raw bit error rate of $ 11.5
                 \times 10^{-3} $.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "141",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:PAP,
  author =       "Yi Wang and Lisha Dong and Rui Mao",
  title =        "{P-Alloc}: Process-Variation Tolerant Reliability
                 Management for {$3$D} Charge-Trapping Flash Memory",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "142:1--142:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126554",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Three-dimensional (3D) flash memory is an emerging
                 memory technology that enables a number of improvements
                 to conventional planar NAND flash memory, including
                 larger capacity, less program disturbance, and lower
                 access latency. In contrast to conventional planar
                 flash memory, 3D flash memory adopts charge-trapping
                 mechanism. NAND strings punch through multiple stacked
                 layers to form the three-dimensional infrastructure.
                 However, the etching processes for NAND strings are
                 unable to produce perfectly vertical features,
                 especially on the scale of 20 nanometers or less. The
                 process variation will cause uneven distribution of
                 electrons, which poses a threat to the integrity of
                 data stored in flash. This paper present P-Alloc, a
                 process-variation tolerant reliability management
                 strategy for 3D charge-trapping flash memory. P-Alloc
                 offers both hardware and software support to allocate
                 data to the 3D flash in the presence of process
                 variation. P-Alloc predicts the state of a physical
                 page, i.e., the basic unit for each write or read
                 operation in flash memory, and tries to assign critical
                 data to more reliable pages. A hardware-based voltage
                 threshold compensation scheme is also proposed to
                 further reduce the faults. We demonstrate the viability
                 of the proposed scheme using a variety of realistic
                 workloads. Our extensive evaluations show that, P-Alloc
                 significantly enhances the reliability and reduces the
                 access latency compared to the baseline scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "142",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tan:2017:ASA,
  author =       "Benjamin Tan and Morteza Biglari-Abhari and Zoran
                 Salcic",
  title =        "An Automated Security-Aware Approach for Design of
                 Embedded Systems on {MPSoC}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "143:1--143:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126553",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "MPSoC-based embedded systems design is becoming
                 increasingly complex. Not only do we need to satisfy
                 multiple design objectives, we increasingly need to
                 address potential security risks. In this work, we
                 propose a security-aware systematic design approach
                 which explores the design space, given a system-level
                 application description, by generating potential
                 architecture configurations of execution platform nodes
                 that are interconnected using a NoC. We then perform
                 automated security analysis to check the generated
                 configurations against designer-specified security
                 constraints. Following the analysis, we use an
                 automated architecture configuration refinement process
                 to generate a list of security additions that are
                 inserted into the initial configuration so that the
                 security constraints are satisfied. By performing this
                 refinement on several candidate configuration options,
                 we can explore the trade-off between resource cost and
                 security. In this paper, we illustrate the proposed
                 approach using a Smart Home Control System
                 application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "143",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsoutsouras:2017:SSO,
  author =       "Vasileios Tsoutsouras and Dimosthenis Masouros and
                 Sotirios Xydis and Dimitrios Soudris",
  title =        "{SoftRM}: Self-Organized Fault-Tolerant Resource
                 Management for Failure Detection and Recovery in {NoC}
                 Based Many-Cores",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "144:1--144:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126562",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many-core systems are envisioned to leverage the
                 ever-increasing demand for more powerful computing
                 systems. To provide the necessary computing power, the
                 number of Processing Elements integrated on-chip
                 increases and NoC based infrastructures are adopted to
                 address the interconnection scalability. The advent of
                 these new architectures surfaces the need for more
                 sophisticated, distributed resource management
                 paradigms, which in addition to the extreme integration
                 scaling, make the new systems more prone to errors
                 manifested both at hardware and software. In this work,
                 we highlight the need for Run-Time Resource management
                 to be enhanced with fault tolerance features and
                 propose SoftRM, a resource management framework which
                 can dynamically adapt to permanent failures in a
                 self-organized, workload-aware manner.
                 Self-organization allows the resource management agents
                 to recover from a failure in a coordinated way by
                 electing a new agent to replace the failed one, while
                 workload awareness optimizes this choice according to
                 the status of each core. We evaluate the proposed
                 framework on Intel Single-chip Cloud Computer (SCC), a
                 NoC based many-core system and customize it to achieve
                 minimum interference on the resource allocation
                 process. We showcase that its workload-aware features
                 manage to utilize free resources in more that 90\% of
                 the conducted experiments. Comparison with relevant
                 state-of-the-art fault tolerant frameworks shows
                 decrease of up to 67\% in the imposed overhead on
                 application execution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "144",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhat:2017:PTS,
  author =       "Ganapati Bhat and Suat Gumussoy and Umit Y. Ogras",
  title =        "Power-Temperature Stability and Safety Analysis for
                 Multiprocessor Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "145:1--145:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126567",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern multiprocessor system-on-chips (SoCs) integrate
                 multiple heterogeneous cores to achieve high energy
                 efficiency. The power consumption of each core
                 contributes to an increase in the temperature across
                 the chip floorplan. In turn, higher temperature
                 increases the leakage power exponentially, and leads to
                 a positive feedback with nonlinear dynamics. This paper
                 presents a power-temperature stability and safety
                 analysis technique for multiprocessor systems. This
                 analysis reveals the conditions under which the
                 power-temperature trajectory converges to a stable
                 fixed point. We also present a simple formula to
                 compute the stable fixed point and maximum
                 thermally-safe power consumption at runtime. Hardware
                 measurements on a state-of-the-art mobile processor
                 show that our analytical formulation can predict the
                 stable fixed point with an average error of 2.6\%.
                 Hence, our approach can be used at runtime to ensure
                 thermally safe operation and guard against thermal
                 threats.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "145",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:CEG,
  author =       "Siqi Wang and Guanwen Zhong and Tulika Mitra",
  title =        "{CGPredict}: Embedded {GPU} Performance Estimation
                 from Single-Threaded Applications",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "146:1--146:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126546",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneous multiprocessor system-on-chip
                 architectures are endowed with accelerators such as
                 embedded GPUs and FPGAs capable of general-purpose
                 computation. The application developers for such
                 platforms need to carefully choose the accelerator with
                 the maximum performance benefit. For a given
                 application, usually, the reference code is specified
                 in a high-level single-threaded programming language
                 such as C. The performance of an application kernel on
                 an accelerator is a complex interplay among the exposed
                 parallelism, the compiler, and the accelerator
                 architecture. Thus, determining the performance of a
                 kernel requires its redevelopment into each
                 accelerator-specific language, causing substantial
                 wastage of time and effort. To aid the developer in
                 this early design decision, we present an analytical
                 framework CGPredict to predict the performance of a
                 computational kernel on an embedded GPU architecture
                 from un-optimized, single-threaded C code. The
                 analytical approach provides insights on application
                 characteristics which suggest further
                 application-specific optimizations. The estimation
                 error is as low as 2.66\% (average 9\%) compared to the
                 performance of the same kernel written in native CUDA
                 code running on NVIDIA Kepler embedded GPU. This low
                 performance estimation error enables CGPredict to
                 provide an early design recommendation of the
                 accelerator starting from C code.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "146",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Singh:2017:EER,
  author =       "Amit Kumar Singh and Alok Prakash and Karunakar Reddy
                 Basireddy and Geoff V. Merrett and Bashir M.
                 Al-Hashimi",
  title =        "Energy-Efficient Run-Time Mapping and Thread
                 Partitioning of Concurrent {OpenCL} Applications on
                 {CPU--GPU MPSoCs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "147:1--147:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126548",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneous Multi-Processor Systems-on-Chips
                 (MPSoCs) containing CPU and GPU cores are typically
                 required to execute applications concurrently. However,
                 as will be shown in this paper, existing approaches are
                 not well suited for concurrent applications as they are
                 developed either by considering only a single
                 application or they do not exploit both CPU and GPU
                 cores at the same time. In this paper, we propose an
                 energy-efficient run-time mapping and thread
                 partitioning approach for executing concurrent OpenCL
                 applications on both GPU and GPU cores while satisfying
                 performance requirements. Depending upon the
                 performance requirements, for each concurrently
                 executing application, the mapping process finds the
                 appropriate number of CPU cores and operating
                 frequencies of CPU and GPU cores, and the partitioning
                 process identifies an efficient partitioning of the
                 applications' threads between CPU and GPU cores. We
                 validate the proposed approach experimentally on the
                 Odroid-XU3 hardware platform with various mixes of
                 applications from the Polybench benchmark suite.
                 Additionally, a case-study is performed with a
                 real-world application SLAMBench. Results show an
                 average energy saving of 32\% compared to existing
                 approaches while still satisfying the performance
                 requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "147",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Oneal:2017:GPE,
  author =       "Kenneth O'neal and Philip Brisk and Ahmed Abousamra
                 and Zack Waters and Emily Shriver",
  title =        "{GPU} Performance Estimation using Software
                 Rasterization and Machine Learning",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "148:1--148:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126557",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper introduces a predictive modeling framework
                 to estimate the performance of GPUs during pre-silicon
                 design. Early-stage performance prediction is useful
                 when simulation times impede development by rendering
                 driver performance validation, API conformance testing
                 and design space explorations infeasible. Our approach
                 builds a Random Forest regression model to analyze
                 DirectX 3D workload behavior when executed by a
                 software rasterizer, which we have extended with a
                 workload characterizer to collect further performance
                 information via program counters. In addition to
                 regression models, this work produces detailed feature
                 rankings which can provide valuable architectural
                 insight, and accurate performance estimates for an
                 Intel integrated Skylake generation GPU. Our models
                 achieve reasonable out-of-sample-error rates of 14\%,
                 with an average simulation speedup of 327x.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "148",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fezzardi:2017:UEP,
  author =       "Pietro Fezzardi and Marco Lattuada and Fabrizio
                 Ferrandi",
  title =        "Using Efficient Path Profiling to Optimize Memory
                 Consumption of On-Chip Debugging for High-Level
                 Synthesis",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "149:1--149:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126564",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "High-Level Synthesis (HLS) for FPGAs is attracting
                 popularity and is increasingly used to handle complex
                 systems with multiple integrated components. To
                 increase performance and efficiency, HLS flows now
                 adopt several advanced optimization techniques.
                 Aggressive optimizations and system level integration
                 can cause the introduction of bugs that are only
                 observable on-chip. Debugging support for circuits
                 generated with HLS is receiving a considerable
                 attention. Among the data that can be collected on chip
                 for debugging, one of the most important is the state
                 of the Finite State Machines (FSM) controlling the
                 components of the circuit. However, this usually
                 requires a large amount of memory to trace the behavior
                 during the execution. This work proposes an approach
                 that takes advantage of the HLS information and of the
                 structure of the FSM to compress control flow traces
                 and to integrate optimized components for on-chip
                 debugging. The generated checkers analyze the FSM
                 execution on-fly, automatically notifying when a bug is
                 detected, localizing it and providing data about its
                 cause. The traces are compressed using a software
                 profiling technique, called Efficient Path Profiling
                 (EPP), adapted for the debugging of hardware
                 accelerators generated with HLS. With this technique,
                 the size of the memory used to store control flow
                 traces can be reduced up to 2 orders of magnitude,
                 compared to state-of-the-art.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "149",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Piccolboni:2017:CCH,
  author =       "Luca Piccolboni and Paolo Mantovani and Giuseppe {Di
                 Guglielmo} and Luca P. Carloni",
  title =        "{COSMOS}: Coordination of High-Level Synthesis and
                 Memory Optimization for Hardware Accelerators",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "150:1--150:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126566",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hardware accelerators are key to the efficiency and
                 performance of system-on-chip (SoC) architectures. With
                 high-level synthesis (HLS), designers can easily obtain
                 several performance-cost trade-off implementations for
                 each component of a complex hardware accelerator.
                 However, navigating this design space in search of the
                 Pareto-optimal implementations at the system level is a
                 hard optimization task. We present COSMOS, an automatic
                 methodology for the design-space exploration (DSE) of
                 complex accelerators, that coordinates both HLS and
                 memory optimization tools in a compositional way.
                 First, thanks to the co-design of datapath and memory,
                 COSMOS produces a large set of Pareto-optimal
                 implementations for each component of the accelerator.
                 Then, COSMOS leverages compositional design techniques
                 to quickly converge to the desired trade-off point
                 between cost and performance at the system level. When
                 applied to the system-level design (SLD) of an
                 accelerator for wide-area motion imagery (WAMI), COSMOS
                 explores the design space as completely as an
                 exhaustive search, but it reduces the number of
                 invocations to the HLS tool by up to $ 14.6 \times $.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "150",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Motamedi:2017:MIR,
  author =       "Mohammad Motamedi and Daniel Fong and Soheil Ghiasi",
  title =        "Machine Intelligence on Resource-Constrained {IoT}
                 Devices: The Case of Thread Granularity Optimization
                 for {CNN} Inference",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "151:1--151:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126555",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite their remarkable performance in various
                 machine intelligence tasks, the computational intensity
                 of Convolutional Neural Networks (CNNs) has hindered
                 their widespread utilization in resource-constrained
                 embedded and IoT systems. To address this problem, we
                 present a framework for synthesis of efficient CNN
                 inference software targeting mobile SoC platforms. We
                 argue that thread granularity can substantially impact
                 the performance and energy dissipation of the
                 synthesized inference software, and demonstrate that
                 launching the maximum number of logical threads, often
                 promoted as a guiding principle by GPGPU practitioners,
                 does not result in an efficient implementation for
                 mobile SoCs. We hypothesize that the runtime of a CNN
                 layer on a particular SoC platform can be accurately
                 estimated as a linear function of its computational
                 complexity, which may seem counter-intuitive, as modern
                 mobile SoCs utilize a plethora of heterogeneous
                 architectural features and dynamic resource management
                 policies. Consequently, we develop a principled
                 approach and a data-driven analytical model to optimize
                 granularity of threads during CNN software synthesis.
                 Experimental results with several modern CNNs mapped to
                 a commodity Android smartphone with a Snapdragon SoC
                 show up to 2.37X speedup in application runtime, and up
                 to 1.9X improvement in its energy dissipation compared
                 to existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "151",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vougioukas:2017:NFS,
  author =       "Ilias Vougioukas and Andreas Sandberg and Stephan
                 Diestelhorst and Bashir M. Al-Hashimi and Geoff V.
                 Merrett",
  title =        "Nucleus: Finding the Sharing Limit of Heterogeneous
                 Cores",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "152:1--152:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126544",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Heterogeneous multi-processors are designed to bridge
                 the gap between performance and energy efficiency in
                 modern embedded systems. This is achieved by pairing
                 Out-of-Order (OoO) cores, yielding performance through
                 aggressive speculation and latency masking, with
                 In-Order (InO) cores, that preserve energy through
                 simpler design. By leveraging migrations between them,
                 workloads can therefore select the best setting for any
                 given energy/delay envelope. However, migrations
                 introduce execution overheads that can hurt performance
                 if they happen too frequently. Finding the optimal
                 migration frequency is critical to maximize energy
                 savings while maintaining acceptable performance. We
                 develop a simulation methodology that can (1) isolate
                 the hardware effects of migrations from the software,
                 (2) directly compare the performance of different core
                 types, (3) quantify the performance degradation and (4)
                 calculate the cost of migrations for each case. To
                 showcase our methodology we run mibench, a
                 microbenchmark suite, and show that migrations can
                 happen as fast as every 100k instructions with little
                 performance loss. We also show that, contrary to
                 numerous recent studies, hypothetical designs do not
                 need to share all of their internal components to be
                 able to migrate at that frequency. Instead, we propose
                 a feasible system that shares level 2 caches and a
                 translation lookaside buffer that matches performance
                 and efficiency. Our results show that there are phases
                 comprising up to 10\% that a migration to the OoO core
                 leads to performance benefits without any additional
                 energy cost when running on the InO core, and up to 6\%
                 of phases where a migration to the InO core can save
                 energy without affecting performance. When considering
                 a policy that focuses on improving the energy-delay
                 product, results show that on average 66\% of the
                 phases can be migrated to deliver equal or better
                 system operation without having to aggressively share
                 the entire memory system or to revert to migration
                 periods finer than 100k instructions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "152",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Papagiannopoulou:2017:ETE,
  author =       "Dimitra Papagiannopoulou and Andrea Marongiu and Tali
                 Moreshet and Maurice Herlihy and R. Iris Bahar",
  title =        "{Edge-TM}: Exploiting Transactional Memory for Error
                 Tolerance and Energy Efficiency",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "153:1--153:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126556",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Scaling of semiconductor devices has enabled higher
                 levels of integration and performance improvements at
                 the price of making devices more susceptible to the
                 effects of static and dynamic variability. Adding
                 safety margins (guardbands) on the operating frequency
                 or supply voltage prevents timing errors, but has a
                 negative impact on performance and energy consumption.
                 We propose Edge-TM, an adaptive hardware/software error
                 management policy that (i) optimistically scales the
                 voltage beyond the edge of safe operation for better
                 energy savings and (ii) works in combination with a
                 Hardware Transactional Memory (HTM)-based error
                 recovery mechanism. The policy applies dynamic voltage
                 scaling (DVS) (while keeping frequency fixed) based on
                 the feedback provided by HTM, which makes it simple and
                 generally applicable. Experiments on an embedded
                 platform show our technique capable of 57\% energy
                 improvement compared to using voltage guardbands and an
                 extra 21--24\% improvement over existing
                 state-of-the-art error tolerance solutions, at a
                 nominal area and time overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "153",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vogel:2017:EVM,
  author =       "Pirmin Vogel and Andreas Kurth and Johannes Weinbuch
                 and Andrea Marongiu and Luca Benini",
  title =        "Efficient Virtual Memory Sharing via On-Accelerator
                 Page Table Walking in Heterogeneous Embedded {SoCs}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "154:1--154:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126560",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Shared virtual memory is key in heterogeneous systems
                 on chip (SoCs) that combine a general-purpose host
                 processor with a many-core accelerator, both for
                 programmability and performance. In contrast to the
                 full-blown, hardware-only solutions predominant in
                 modern high-end systems, lightweight hardware-software
                 co-designs are better suited in the context of more
                 power- and area-constrained embedded systems and
                 provide additional benefits in terms of flexibility and
                 predictability. As a downside, the latter solutions
                 require the host to handle in software synchronization
                 in case of page misses as well as miss handling. This
                 may incur considerable run-time overheads. In this
                 work, we present a novel hardware-software virtual
                 memory management approach for many-core accelerators
                 in heterogeneous embedded SoCs. It exploits an
                 accelerator-side helper thread concept that enables the
                 accelerator to manage its virtual memory hardware
                 autonomously while operating cache-coherently on the
                 page tables of the user-space processes of the host.
                 This greatly reduces overhead with respect to host-side
                 solutions while retaining flexibility. We have
                 validated the design with a set of parameterizable
                 benchmarks and real-world applications covering various
                 application domains. For purely memory-bound kernels,
                 the accelerator performance improves by a factor of 3.8
                 compared with host-based management and lies within
                 50\% of a lower-bound ideal memory management unit.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "154",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khouzani:2017:DBS,
  author =       "Hoda Aghaei Khouzani and Chengmo Yang",
  title =        "A {DWM}-Based Stack Architecture Implementation for
                 Energy Harvesting Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "155:1--155:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy harvesting systems tend to use non-volatile
                 processors to conduct computation under intermittent
                 power supplies. While previous implementations of
                 non-volatile processors are based on register
                 architectures, stack architecture, known for its
                 simplicity and small footprint, seems to be a better
                 fit for energy harvesting systems. In this work, Domain
                 Wall Memory (DWM) is used to implement ZPU, the world's
                 smallest working CPU. Not only does DWM offer
                 ultra-high density and SRAM-comparable access latency,
                 but the sequential access structure of DWM also makes
                 it well suited for a stack whose accesses display high
                 temporal locality. As the performance and energy of DWM
                 are determined by the number of shift operations
                 performed to access the stack, this paper further
                 reduces shift operations through novel data placement
                 and micro-code transformation optimizations. The impact
                 of compiler optimization techniques on the number of
                 shift operations is also investigated so as to select
                 the most effective optimizations for DWM-based stack
                 machine. Experimental studies confirm the effectiveness
                 of the proposed DWM-based stack architectures in
                 improving the performance and energy-efficiency of
                 energy harvesting systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "155",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2017:FPC,
  author =       "Jaehyun Park and Hitesh Joshi and Hyung Gyu Lee and
                 Sayfe Kiaei and Umit Y. Ogras",
  title =        "Flexible {PV}-cell Modeling for Energy Harvesting in
                 Wearable {IoT} Applications",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "156:1--156:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126568",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wearable devices with sensing, processing and
                 communication capabilities have become feasible with
                 the advances in internet-of-things (IoT) and low power
                 design technologies. Energy harvesting is extremely
                 important for wearable IoT devices due to size and
                 weight limitations of batteries. One of the most widely
                 used energy harvesting sources is photovoltaic cell
                 (PV-cell) owing to its simplicity and high output
                 power. In particular, flexible PV-cells offer great
                 potential for wearable applications. This paper models,
                 for the first time, how bending a PV-cell significantly
                 impacts the harvested energy. Furthermore, we derive an
                 analytical model to quantify the harvested energy as a
                 function of the radius of curvature. We validate the
                 proposed model empirically using a commercial PV-cell
                 under a wide range of bending scenarios, light
                 intensities and elevation angles. Finally, we show that
                 the proposed model can accelerate maximum power point
                 tracking algorithms and increase the harvested energy
                 by up to 25.0\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "156",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Andalam:2017:NEM,
  author =       "Sidharta Andalam and Nathan Allen and Avinash Malik
                 and Partha S. Roop and Mark Trew",
  title =        "A Novel Emulation Model of the Cardiac Conduction
                 System",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "157:1--157:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126542",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Models of the cardiac conduction system are usually at
                 two extremes: (1) high fidelity models with excellent
                 precision but lacking a real-time response for
                 emulation (hardware in the loop simulation); or (2)
                 models amenable for emulation, but that do not exhibit
                 appropriate dynamic response, which is necessary for
                 arrhythmia susceptibility. We introduce two
                 abstractions to remedy the situation. The first
                 abstraction is a new cell model, which is a semi-linear
                 hybrid automata. The proposed model is as
                 computationally efficient as current state-of-the-art
                 cell models amenable for emulation. Yet, unlike these
                 models, it is also able to capture the dynamic response
                 of the cardiac cell like the higher-fidelity models.
                 The second abstraction is the use of smooth-tokens to
                 develop a new path model, connecting cells, which is
                 efficient in terms of memory consumption. Moreover, the
                 memory requirements of the path model can be statically
                 bounded and are invariant to the emulation step size.
                 Results show that the proposed semi-linear abstraction
                 for the cell reduces the execution time by up to 44\%.
                 Furthermore, the smooth-tokens based path model reduces
                 the memory consumption by 40 times when compared to
                 existing path models. This paves the way for the
                 emulation of complex cardiac conduction systems, using
                 hardware code-generators.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "157",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rouhani:2017:RAF,
  author =       "Bita Darvish Rouhani and Azalia Mirhoseini and Farinaz
                 Koushanfar",
  title =        "{RISE}: an Automated Framework for Real-Time
                 Intelligent Video Surveillance on {FPGA}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "158:1--158:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126549",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes RISE, an automated Reconfigurable
                 framework for real-time background subtraction applied
                 to Intelligent video SurveillancE. RISE is devised with
                 a new streaming-based methodology that adaptively
                 learns/updates a corresponding dictionary matrix from
                 background pixels as new video frames are captured over
                 time. This dictionary is used to highlight the
                 foreground information in each video frame. A key
                 characteristic of RISE is that it adaptively adjusts
                 its dictionary for diverse lighting conditions and
                 varying camera distances by continuously updating the
                 corresponding dictionary. We evaluate RISE on
                 natural-scene vehicle images of different backgrounds
                 and ambient illuminations. To facilitate automation, we
                 provide an accompanying API that can be used to deploy
                 RISE on FPGA-based system-on-chip platforms. We
                 prototype RISE for end-to-end deployment of three
                 widely-adopted image processing tasks used in
                 intelligent transportation systems: License Plate
                 Recognition (LPR), image denoising/reconstruction, and
                 principal component analysis. Our evaluations
                 demonstrate up to 87-fold higher throughput per energy
                 unit compared to the prior-art software solution
                 executed on ARM Cortex-A15 embedded platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "158",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Basu:2017:IUL,
  author =       "Soumya Basu and Loris Duch and Rub{\'e}n Braojos and
                 Giovanni Ansaloni and Laura Pozzi and David Atienza",
  title =        "An Inexact Ultra-low Power Bio-signal Processing
                 Architecture With Lightweight Error Recovery",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "159:1--159:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126565",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The energy efficiency of digital architectures is
                 tightly linked to the voltage level (Vdd) at which they
                 operate. Aggressive voltage scaling is therefore
                 mandatory when ultra-low power processing is required.
                 Nonetheless, the lowest admissible Vdd is often bounded
                 by reliability concerns, especially since static and
                 dynamic non-idealities are exacerbated in the
                 near-threshold region, imposing costly guard-bands to
                 guarantee correctness under worst-case conditions. A
                 striking alternative, explored in this paper, waives
                 the requirement for unconditional correctness,
                 undergoing more relaxed constraints. First, after a
                 run-time failure, processing correctly resumes at a
                 later point in time. Second, failures induce a limited
                 Quality-of-Service (QoS) degradation. We focus our
                 investigation on the practical scenario of embedded
                 bio-signal analysis, a domain in which energy
                 efficiency is key, while applications are inherently
                 error-tolerant to a certain degree. Targeting a
                 domain-specific multi-core platform, we present a study
                 of the impact of inexactness on application-visible
                 errors. Then, we introduce a novel methodology to
                 manage them, which requires minimal hardware resources
                 and a negligible energy overhead. Experimental evidence
                 show that, by tolerating 900 errors/hour, the resulting
                 inexact platform can achieve an efficiency increase of
                 up to 24\%, with a QoS degradation of less than 3\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "159",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{VanPinxten:2017:OSR,
  author =       "Joost {Van Pinxten} and Umar Waqas and Marc Geilen and
                 Twan Basten and Lou Somers",
  title =        "Online Scheduling of $2$-Re-entrant Flexible
                 Manufacturing Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "160:1--160:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126551",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Online scheduling of operations is essential to
                 optimize productivity of flexible manufacturing systems
                 (FMSs) where manufacturing requests arrive on the fly.
                 An FMS processes products according to a particular
                 flow through processing stations. This work focusses on
                 online scheduling of re-entrant FMSs with flows using
                 processing stations where products pass twice and with
                 limited buffering between processing stations. This
                 kind of FMS is modelled as a re-entrant flow shop with
                 due dates and sequence-dependent set-up times. Such
                 flow shops can benefit from minimization of the time
                 penalties incurred from set-up times. On top of an
                 existing greedy scheduling heuristic we apply a
                 meta-heuristic that simultaneously explores several
                 alternatives considering trade-offs between the used
                 metrics by the scheduling heuristic. We identify
                 invariants to efficiently remove many infeasible
                 scheduling options so that the running time of online
                 implementations is improved. The resulting algorithm is
                 much faster than the state of the art and produces
                 schedules with on average 4.6\% shorter makespan.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "160",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Beckert:2017:RTA,
  author =       "Matthias Beckert and Rolf Ernst",
  title =        "Response Time Analysis for Sporadic Server Based
                 Budget Scheduling in Real Time Virtualization
                 Environments",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "161:1--161:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126559",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Virtualization techniques for embedded real-time
                 systems typically employ TDMA scheduling to achieve
                 temporal isolation among different virtualized
                 applications. Recent work already introduced sporadic
                 server based solutions relying on budgets instead of a
                 fixed TDMA schedule. While providing better
                 average-case response times for IRQs and tasks, a
                 formal response time analysis for the worst-case is
                 still missing. In order to confirm the advantage of a
                 sporadic server based budget scheduling, this paper
                 provides a worst-case response time analysis. To
                 improve the sporadic server based budget scheduling
                 even more, we provide a background scheduling
                 implementation which will also be covered by the formal
                 analysis. We show correctness of the analysis approach
                 and compare it against TDMA based systems. In addition
                 to that, we provide response time measurements from a
                 working hypervisor implementation on an ARM based
                 development board.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "161",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:RTD,
  author =       "Xiaowen Chen and Zhonghai Lu and Sheng Liu and Shuming
                 Chen",
  title =        "Round-trip {DRAM} Access Fairness in {$3$D}
                 {NoC-based} Many-core Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "162:1--162:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126561",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In 3D NoC-based many-core systems, DRAM accesses
                 behave differently due to their different communication
                 distances and the latency gap of different DRAM
                 accesses becomes bigger as the network size increases,
                 which leads to unfair DRAM access performance among
                 different nodes. This phenomenon may lead to high
                 latencies for some DRAM accesses that become the
                 performance bottleneck of the system. The paper
                 addresses the DRAM access fairness problem in 3D
                 NoC-based many-core systems by narrowing the latency
                 difference of DRAM accesses as well as reducing the
                 maximum latency. Firstly, the latency of a round-trip
                 DRAM access is modeled and the factors causing DRAM
                 access latency difference are discussed in detail.
                 Secondly, the DRAM access fairness is further
                 quantitatively analyzed through experiments. Thirdly,
                 we propose to predict the network latency of round-trip
                 DRAM accesses and use the predicted round-trip DRAM
                 access time as the basis to prioritize the DRAM
                 accesses in DRAM interfaces so that the DRAM accesses
                 with potential high latencies can be transferred as
                 early and fast as possible, thus achieving fair DRAM
                 access. Experiments with synthetic and application
                 workloads validate that our approach can achieve fair
                 DRAM access and outperform the traditional
                 First-Come-First-Serve (FCFS) scheduling policy and the
                 scheduling policies proposed by reference [7] and [24]
                 in terms of maximum latency, Latency Standard Deviation
                 (LSD)1 and speedup. In the experiments, the maximum
                 improvement of the maximum latency, LSD, and speedup
                 are 12.8\%, 6.57\%, and 8.3\% respectively. Besides,
                 our proposal brings very small extra hardware overhead
                 ($<$ 0.6\%) in comparison to the three counterparts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "162",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2017:MAA,
  author =       "Jaewoo Lee and Hoon Sung Chwa and Linh T. X. Phan and
                 Insik Shin and Insup Lee",
  title =        "{MC-ADAPT}: Adaptive Task Dropping in
                 Mixed-Criticality Scheduling",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "163:1--163:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126498",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent embedded systems are becoming integrated
                 systems with components of different criticality. To
                 tackle this, mixed-criticality systems aim to provide
                 different levels of timing assurance to components of
                 different criticality levels while achieving efficient
                 resource utilization. Many approaches have been
                 proposed to execute more lower-criticality tasks
                 without affecting the timeliness of higher-criticality
                 tasks. Those previous approaches however have at least
                 one of the two limitations; (i) they penalize all
                 lower-criticality tasks at once upon a certain
                 situation, or (ii) they make the decision how to
                 penalize lower-criticality tasks at design time. As a
                 consequence, they under-utilize resources by imposing
                 an excessive penalty on low-criticality tasks. Unlike
                 those existing studies, we present a novel framework,
                 called MC-ADAPT, that aims to minimally penalize
                 lower-criticality tasks by fully reflecting the
                 dynamically changing system behavior into adaptive
                 decision making. Towards this, we propose a new
                 scheduling algorithm and develop its runtime
                 schedulability analysis capable of capturing the
                 dynamic system state. Our proposed algorithm adaptively
                 determines which task to drop based on the runtime
                 analysis. To determine the quality of task dropping
                 solution, we propose the speedup factor for task
                 dropping while the conventional use of the speedup
                 factor only evaluates MC scheduling algorithms in terms
                 of the worst-case schedulability. We apply the speedup
                 factor for a newly-defined task dropping problem that
                 evaluates task dropping solution under different
                 runtime scheduling scenarios. We derive that MC-ADAPT
                 has a speedup factor of 1.619 for task drop. This
                 implies that MC-ADAPT can behave the same as the
                 optimal scheduling algorithm with optimal task dropping
                 strategy does under any runtime scenario if the system
                 is sped up by a factor of 1.619.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "163",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rouxel:2017:TCD,
  author =       "Benjamin Rouxel and Steven Derrien and Isabelle
                 Puaut",
  title =        "Tightening Contention Delays While Scheduling Parallel
                 Applications on Multi-core Architectures",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "164:1--164:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126496",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multi-core systems are increasingly interesting
                 candidates for executing parallel real-time
                 applications, in avionic, space or automotive
                 industries, as they provide both computing capabilities
                 and power efficiency. However, ensuring that timing
                 constraints are met on such platforms is challenging,
                 because some hardware resources are shared between
                 cores. Assuming worst-case contentions when analyzing
                 the schedulability of applications may result in
                 systems mistakenly declared unschedulable, although the
                 worst-case level of contentions can never occur in
                 practice. In this paper, we present two
                 contention-aware scheduling strategies that produce a
                 time-triggered schedule of the application's tasks.
                 Based on knowledge of the application's structure, our
                 scheduling strategies precisely estimate the effective
                 contentions, in order to minimize the overall makespan
                 of the schedule. An Integer Linear Programming (ILP)
                 solution of the scheduling problem is presented, as
                 well as a heuristic solution that generates schedules
                 very close to ones of the ILP (5\% longer on average),
                 with a much lower time complexity. Our heuristic
                 improves by 19\% the overall makespan of the resulting
                 schedules compared to a worst-case contention
                 baseline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "164",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2017:DAT,
  author =       "Rehan Ahmed and Pengcheng Huang and Max Millen and
                 Lothar Thiele",
  title =        "On The Design and Application of Thermal Isolation
                 Servers",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "165:1--165:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126512",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, there has been an increasing trend towards
                 executing real-time applications on multi-core
                 platforms. However, this complicates the design
                 problem, as applications running on different cores can
                 interfere due to shared resources and mediums. In this
                 paper, we focus on thermal interference, where a given
                 task ($ \tau_1 $) heats the processor, resulting in
                 reduced service (due to Dynamic Thermal Management
                 (DTM)) to another task ($ \tau_2 $). In real-time
                 domain, where tasks have deadline constraints, thermal
                 interference is a substantial problem as it directly
                 impacts the Worst Case Execution Time (WCET) of the
                 effected application ($ \tau_2 $). The problem
                 exacerbates as we move to mixed-criticality systems,
                 where the criticality of $ \tau_2$ may be greater than
                 the criticality of $ \tau_1$, complicating the
                 certification process. In this paper, we propose a
                 server based strategy (Thermal Isolation Server (TI
                 Server)) which can be used to avoid thermal
                 interference of applications. We also present a
                 heuristic to design TI Servers to meet the timing
                 constraints of all tasks and the thermal constraints of
                 the system. TI Servers are time/space composable, and
                 can be applied to a variety of task models. We also
                 evaluate TI Servers on a hardware test-bed for
                 validation purposes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "165",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Allamigeon:2017:FMC,
  author =       "Xavier Allamigeon and St{\'e}phane Gaubert and Eric
                 Goubault and Sylvie Putot and Nikolas Stott",
  title =        "A Fast Method to Compute Disjunctive Quadratic
                 Invariants of Numerical Programs",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "166:1--166:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126502",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We introduce a new method to compute non-convex
                 invariants of numerical programs, which includes the
                 class of switched affine systems with affine guards. We
                 obtain disjunctive and non-convex invariants by
                 associating different partial execution traces with
                 different ellipsoids. A key ingredient is the solution
                 of non-monotone fixed points problems over the space of
                 ellipsoids with a reduction to small size linear matrix
                 inequalities. This allows us to analyze instances that
                 are inaccessible in terms of expressivity or scale by
                 earlier methods based on semi-definite programming.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "166",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schulze:2017:IIM,
  author =       "Christoph Schulze and Rance Cleaveland",
  title =        "Improving Invariant Mining via Static Analysis",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "167:1--167:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126504",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes the use of static analysis to
                 improve the generation of invariants from test data
                 extracted from Simulink models. Previous work has shown
                 the utility of such automatically generated invariants
                 as a means for updating and completing system
                 specifications; they also are useful as a means of
                 understanding model behavior. This work shows how the
                 scalability and accuracy of the data mining process can
                 be dramatically improved by using information from
                 data/control flow analysis to reduce the search space
                 of the invariant mining and to eliminate false
                 positives. Comparative evaluations of the process show
                 that the improvements significantly reduce execution
                 time and memory consumption, thereby supporting the
                 analysis of more complex models, while also improving
                 the accuracy of the generated invariants.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "167",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chaki:2017:FVT,
  author =       "Sagar Chaki and Dionisio {De Niz}",
  title =        "Formal Verification of a Timing Enforcer
                 Implementation",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "168:1--168:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126517",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A timing enforcer is a scheduler that not only
                 allocates CPU cycles to threads, but also uses timers
                 to enforce time budgets. An approach for verifying
                 safety properties of timing enforcers at the source
                 code level is presented. We assume that the enforcer is
                 implemented as a set of ``enforcer'' functions that are
                 executed atomically on critical system-level events,
                 such as the arrival and departure of jobs, and
                 triggering of timers. The key idea is to express the
                 safety property as an invariant, and prove that it is
                 inductive across all the enforcer functions. A formal
                 semantics of timing enforcers is presented, including
                 the semantics of functions used to read the system
                 clock and set timers. Using this semantics, the
                 verification approach is presented, and its soundness
                 proved. Further, the approach also takes into
                 consideration the periodicity of tasks. It is validated
                 by proving the correctness of the enforcement of CPU
                 cycle budgets for tasks by the Zero-Slack Rate
                 Monotonic (zsrm) scheduler, which is implemented in C
                 as a Linux kernel module. The inductiveness of the
                 necessary zsrm invariants is proved by expressing them
                 as function contracts using the acsl specification
                 language, and verifying the contracts using the frama-c
                 tool.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "168",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mehrabian:2017:TTL,
  author =       "Mohammadreza Mehrabian and Mohammad Khayatian and
                 Aviral Shrivastava and John C. Eidson and Patricia
                 Derler and Hugo A. Andrade and Ya-Shian Li-Baboud and
                 Edward Griffor and Marc Weiss and Kevin Stanton",
  title =        "Timestamp Temporal Logic {(TTL)} for Testing the
                 Timing of Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "169:1--169:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126510",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to test the performance and verify the
                 correctness of Cyber-Physical Systems (CPS), the timing
                 constraints on the system behavior must be met. Signal
                 Temporal Logic (STL) can efficiently and succinctly
                 capture the timing constraints of a given system model.
                 However, many timing constraints on CPS are more
                 naturally expressed in terms of events on signals.
                 While it is possible to specify event-based timing
                 constraints in STL, such statements can quickly become
                 long and arcane in even simple systems. Timing
                 constraints for CPS, which can be large and complex
                 systems, are often associated with tolerances, the
                 expression of which can make the timing constraints
                 even more cumbersome using STL. This paper proposes a
                 new logic, Timestamp Temporal Logic (TTL), to provide a
                 definitional extension of STL that more intuitively
                 expresses the timing constraints of distributed CPS.
                 TTL also allows for a more natural expression of timing
                 tolerances. Additionally, this paper outlines a
                 methodology to automatically generate logic code and
                 programs to monitor the expressed timing constraints.
                 Since our TTL monitoring logic evaluates the timing
                 constraints using only the timestamps of the required
                 events on the signal, the TTL monitoring logic has
                 significantly less memory footprint when compared to
                 traditional STL monitoring logic, which stores the
                 signal value at the required sampling frequency. The
                 key contribution of this paper is a scalable approach
                 for online monitoring of the timing constraints. We
                 demonstrate the capabilities of TTL and our methodology
                 for online monitoring of TTL constraints on two case
                 studies: (1) Synchronization and phase control of two
                 generators and, (2) Simultaneous image capture using
                 distributed cameras for 3D image reconstruction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "169",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Deshmukh:2017:TCP,
  author =       "Jyotirmoy Deshmukh and Marko Horvat and Xiaoqing Jin
                 and Rupak Majumdar and Vinayak S. Prabhu",
  title =        "Testing Cyber-Physical Systems through {Bayesian}
                 Optimization",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "170:1--170:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126521",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many problems in the design and analysis of
                 cyber-physical systems (CPS) reduce to the following
                 optimization problem: given a CPS which transforms
                 continuous-time input traces in R$_m$ to
                 continuous-time output traces in R$_n$ and a cost
                 function over output traces, find an input trace which
                 minimizes the cost. Cyber-physical systems are
                 typically so complex that solving the optimization
                 problem analytically by examining the system dynamics
                 is not feasible. We consider a black-box approach,
                 where the optimization is performed by testing the
                 input-output behaviour of the CPS. We provide a
                 unified, tool-supported methodology for CPS testing and
                 optimization. Our tool is the first CPS testing tool
                 that supports Bayesian optimization. It is also the
                 first to employ fully automated dimensionality
                 reduction techniques. We demonstrate the potential of
                 our tool by running experiments on multiple industrial
                 case studies. We compare the effectiveness of Bayesian
                 optimization to state-of-the-art testing techniques
                 based on CMA-ES and Simulated Annealing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "170",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2017:WHS,
  author =       "Youcheng Sun and Marco {Di Natale}",
  title =        "Weakly Hard Schedulability Analysis for Fixed Priority
                 Scheduling of Periodic Real-Time Tasks",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "171:1--171:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126497",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The hard deadline model is very popular in real-time
                 research, but is representative or applicable to a
                 small number of systems. Many applications, including
                 control systems, are capable of tolerating occasional
                 deadline misses, but are seriously compromised by a
                 repeating pattern of late terminations. The weakly hard
                 real-time model tries to capture these requirements by
                 analyzing the conditions that guarantee that a maximum
                 number of deadlines can be possibly missed in any set
                 of consecutive activations. We provide a new weakly
                 hard schedulability analysis method that applies to
                 constrained-deadline periodic real-time systems
                 scheduled with fixed priority and without knowledge of
                 the task activation offsets. The analysis is based on a
                 Mixed Integer Linear Programming (MILP) problem
                 formulation; it is very general and can be adapted to
                 include the consideration of resource sharing and
                 activation jitter. A set of experiments conducted on an
                 automotive engine control application and randomly
                 generated tasksets show the applicability and accuracy
                 of the proposed technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "171",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schlatow:2017:RTA,
  author =       "Johannes Schlatow and Rolf Ernst",
  title =        "Response-Time Analysis for Task Chains with Complex
                 Precedence and Blocking Relations",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "172:1--172:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126505",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For the development of complex software systems, we
                 often resort to component-based approaches that
                 separate the different concerns, enhance verifiability
                 and reusability, and for which microkernel-based
                 implementations are a good fit to enforce these
                 concepts. Composing such a system of several
                 interacting software components will, however, lead to
                 complex precedence and blocking relations, which must
                 be taken into account when performing latency analysis.
                 When modelling these systems by classical task graphs,
                 some of these effects are obfuscated and tend to render
                 such an analysis either overly pessimistic or even
                 optimistic. We therefore firstly present a novel task
                 (meta-)model that is more expressive and accurate
                 w.r.t. these (functional) precedence and mutual
                 blocking relations. Secondly, we apply the busy-window
                 approach and formulate a modular response-time analysis
                 on task-chain level suitable but not restricted to
                 static-priority scheduled systems. We show that the
                 conjunction of both concepts allows the calculation of
                 reasonably tight latency bounds for scenarios not
                 adequately covered by related work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "172",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kurtin:2017:ART,
  author =       "Philip S. Kurtin and Marco J. G. Bekooij",
  title =        "An Abstraction-Refinement Theory for the Analysis and
                 Design of Real-Time Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "173:1--173:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126507",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Component-based and model-based reasonings are key
                 concepts to address the increasing complexity of
                 real-time systems. Bounding abstraction theories allow
                 to create efficiently analyzable models that can be
                 used to give temporal or functional guarantees on
                 non-deterministic and non-monotone implementations.
                 Likewise, bounding refinement theories allow to create
                 implementations that adhere to temporal or functional
                 properties of specification models. For systems in
                 which jitter plays a major role, both best-case and
                 worst-case bounding models are needed. In this paper we
                 present a bounding abstraction-refinement theory for
                 real-time systems. Compared to the state-of-the-art
                 TETB refinement theory, our theory is less restrictive
                 with respect to the automatic lifting of properties
                 from component to graph level and does not only support
                 temporal worst-case refinement, but evenhandedly
                 temporal and functional, best-case and worst-case
                 abstraction and refinement.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "173",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Azimi:2017:HHF,
  author =       "Iman Azimi and Arman Anzanpour and Amir M. Rahmani and
                 Tapio Pahikkala and Marco Levorato and Pasi Liljeberg
                 and Nikil Dutt",
  title =        "{HiCH}: Hierarchical Fog-Assisted Computing
                 Architecture for Healthcare {IoT}",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "174:1--174:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126501",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Internet of Things (IoT) paradigm holds
                 significant promises for remote health monitoring
                 systems. Due to their life- or mission-critical nature,
                 these systems need to provide a high level of
                 availability and accuracy. On the one hand, centralized
                 cloud-based IoT systems lack reliability, punctuality
                 and availability (e.g., in case of slow or unreliable
                 Internet connection), and on the other hand, fully
                 outsourcing data analytics to the edge of the network
                 can result in diminished level of accuracy and
                 adaptability due to the limited computational capacity
                 in edge nodes. In this paper, we tackle these issues by
                 proposing a hierarchical computing architecture, HiCH,
                 for IoT-based health monitoring systems. The core
                 components of the proposed system are (1) a novel
                 computing architecture suitable for hierarchical
                 partitioning and execution of machine learning based
                 data analytics, (2) a closed-loop management technique
                 capable of autonomous system adjustments with respect
                 to patient's condition. HiCH benefits from the features
                 offered by both fog and cloud computing and introduces
                 a tailored management methodology for healthcare IoT
                 systems. We demonstrate the efficacy of HiCH via a
                 comprehensive performance assessment and evaluation on
                 a continuous remote health monitoring case study
                 focusing on arrhythmia detection for patients suffering
                 from CardioVascular Diseases (CVDs).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "174",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2017:ORT,
  author =       "Yecheng Zhao and Chao Peng and Haibo Zeng and Zonghua
                 Gu",
  title =        "Optimization of Real-Time Software Implementing
                 Multi-Rate Synchronous Finite State Machines",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "175:1--175:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126515",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Model-based design using Synchronous Reactive (SR)
                 models is becoming widespread for control software
                 development in industry. However, software synthesis is
                 challenging for multi-rate SR models consisting of
                 blocks modeled with finite state machines, due to the
                 complexity of validating the system's real-time
                 schedulability. The existing approach uses the
                 simplified periodic task model to allow an efficient
                 schedulability analysis, which leads to pessimistic and
                 suboptimal solutions. Instead, in this paper, we adopt
                 a more accurate but more complex schedulability
                 analysis. We develop several optimization techniques to
                 improve the algorithm's efficiency. Experimental
                 results on synthetic systems and an industrial case
                 study show that the proposed optimization framework
                 preserves the solution optimality but is much faster
                 (e.g., $ 1000 \times $ for systems with 15 blocks) than
                 the branch-and-bound algorithm, and it generates better
                 control software than the existing approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "175",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bourke:2017:SLS,
  author =       "Timothy Bourke and Francois Carcenac and Jean-Louis
                 Cola{\c{c}}o and Bruno Pagano and C{\'e}dric Pasteur
                 and Marc Pouzet",
  title =        "A Synchronous Look at the {Simulink} Standard
                 Library",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "176:1--176:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126516",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/matlab.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Hybrid systems modelers like Simulink come with a rich
                 collection of discrete-time and continuous-time blocks.
                 Most blocks are not defined in terms of more elementary
                 ones-and some cannot be-but are instead written in
                 imperative code and explained informally in a reference
                 manual. This raises the question of defining a minimal
                 set of orthogonal programming constructs such that most
                 blocks can be programmed directly and thereby given a
                 specification that is mathematically precise, and whose
                 compiled version performs comparably to handwritten
                 code. In this paper, we show that a fairly large set of
                 blocks of a standard library like the one provided by
                 Simulink can be programmed in a precise, purely
                 functional language using stream equations,
                 hierarchical automata, Ordinary Differential Equations
                 (ODEs), and deterministic synchronous parallel
                 composition. Some blocks cannot be expressed in our
                 setting as they mix discrete-time and continuous-time
                 signals in unprincipled ways that are statically
                 forbidden by the type checker. The experiment is
                 conducted in Z{\'e}lus, a synchronous language that
                 conservatively extends L ustre with ODEs to program
                 systems that mix discrete-time and continuous-time
                 signals.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "176",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2017:TAS,
  author =       "Jiajie Wang and Michael Mendler and Partha Roop and
                 Bruno Bodin",
  title =        "Timing Analysis of Synchronous Programs using {WCRT}
                 Algebra: Scalability through Abstraction",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "177:1--177:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126520",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Synchronous languages are ideal for designing
                 safety-critical systems. Static Worst-Case Reaction
                 Time (WCRT) analysis is an essential component in the
                 design flow that ensures the real-time requirements are
                 met. There are a few approaches for WCRT analysis, and
                 the most versatile of all is explicit path enumeration.
                 However, as synchronous programs are highly concurrent,
                 techniques based on this approach, such as model
                 checking, suffer from state explosion as the number of
                 threads increases. One observation on this problem is
                 that these existing techniques analyse the program by
                 enumerating a functionally equivalent automaton while
                 WCRT is a non-functional property. This mismatch
                 potentially causes algorithm-induced state explosion.
                 In this paper, we propose a WCRT analysis technique
                 based on the notion of timing equivalence, expressed
                 using WCRT algebra. WCRT algebra can effectively
                 capture the timing behaviour of a synchronous program
                 by converting its intermediate representation Timed
                 Concurrent Control Flow Graph (TCCFG) into a Tick Cost
                 Automaton (TCA), a minimal automaton that is timing
                 equivalent to the original program. Then the WCRT is
                 computed over the TCA. We have implemented our approach
                 and benchmarked it against state-of-the-art WCRT
                 analysis techniques. The results show that the WCRT
                 algebra is 3.5 times faster on average than the fastest
                 published technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "177",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pinisetty:2017:REC,
  author =       "Srinivas Pinisetty and Partha S. Roop and Steven Smyth
                 and Nathan Allen and Stavros Tripakis and Reinhard {Von
                 Hanxleden}",
  title =        "Runtime Enforcement of Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "178:1--178:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126500",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many implantable medical devices, such as pacemakers,
                 have been recalled due to failure of their embedded
                 software. This motivates rethinking their design and
                 certification processes. We propose, for the first
                 time, an additional layer of safety by formalising the
                 problem of run-time enforcement of implantable
                 pacemakers. While recent work has formalised run-time
                 enforcement of reactive systems, the proposed framework
                 generalises existing work along the following
                 directions: (1) we develop bi-directional enforcement,
                 where the enforced policies depend not only on the
                 status of the pacemaker (the controller) but also of
                 the heart (the plant), thus formalising the run-time
                 enforcement problem for cyber-physical systems (2) we
                 express policies using a variant of discrete timed
                 automata (DTA), which can cover all regular properties
                 unlike earlier frameworks limited to safety properties,
                 (3) we are able to ensure the timing safety of
                 implantable devices through the proposed enforcement,
                 and (4) we show that the DTA-based approach is
                 efficient relative to its dense time variant while
                 ensuring that the discretisation error is relatively
                 small and bounded. The developed approach is validated
                 through a prototype system implemented using the open
                 source KIELER framework. The experiments show that the
                 framework incurs minimal runtime overhead.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "178",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2017:BEB,
  author =       "Qingrui Liu and Xiaolong Wu and Larry Kittinger and
                 Markus Levy and Changhee Jung",
  title =        "{BenchPrime}: Effective Building of a Hybrid Benchmark
                 Suite",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "179:1--179:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126499",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper presents BenchPrime, an automated benchmark
                 analysis toolset that is systematic and extensible to
                 analyze the similarity and diversity of benchmark
                 suites. BenchPrime takes multiple benchmark suites and
                 their evaluation metrics as inputs and generates a
                 hybrid benchmark suite comprising only essential
                 applications. Unlike prior work, BenchPrime uses linear
                 discriminant analysis rather than principal component
                 analysis, as well as selects the best clustering
                 algorithm and the optimized number of clusters in an
                 automated and metric-tailored way, thereby achieving
                 high accuracy. In addition, BenchPrime ranks the
                 benchmark suites in terms of their application set
                 diversity and estimates how unique each benchmark suite
                 is compared to other suites. As a case study, this work
                 for the first time compares the DenBench with the
                 MediaBench and MiBench using four different metrics to
                 provide a multi-dimensional understanding of the
                 benchmark suites. For each metric, BenchPrime measures
                 to what degree DenBench applications are irreplaceable
                 with those in MediaBench and MiBench. This provides
                 means for identifying an essential subset from the
                 three benchmark suites without compromising the
                 application balance of the full set. The experimental
                 results show that the necessity of including DenBench
                 applications varies across the target metrics and that
                 significant redundancy exists among the three benchmark
                 suites.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "179",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schuster:2017:DSE,
  author =       "Simon Schuster and Peter Ulbrich and Isabella
                 Stilkerich and Christian Dietrich and Wolfgang
                 Schr{\"o}Der-Preikschat",
  title =        "Demystifying Soft-Error Mitigation by Control-Flow
                 Checking --- A New Perspective on its Effectiveness",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "180:1--180:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126503",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Soft errors are a challenging and urging problem in
                 the domain of safety-critical embedded systems. For
                 decades, checking schemes have been investigated and
                 improved to mitigate soft-error effects for the class
                 of control-flow faults, with current industrial
                 standards strongly recommending their use. However,
                 reality looks different: Taking a systems perspective,
                 we implemented four representative Control-Flow
                 Checking (CFC) schemes and put them through their paces
                 in 396 fault-injection campaigns. In contrast to
                 previous work, which typically relied on
                 probability-based vulnerability metrics, we accounted
                 for the influence of memory and time overheads on the
                 fault-space dimensions and applied those in full-scan
                 fault injections. This change in procedure alone
                 severely degraded the perceived effectiveness of CFC.
                 In addition, we expanded the perspective to data-flow
                 faults and their influence on the overall
                 susceptibility, an aspect that so far has been largely
                 ignored. Our results suggest that, without accompanying
                 measures, any improvement regarding control-flow faults
                 is dominated by the increase in data faults caused by
                 the increased attack surface in terms of memory and
                 runtime overhead. Moreover, CFC performance less
                 depended on the detection capabilities than on general
                 aspects of the concrete binary compilation and
                 execution. In conclusion, incorporating CFC is not as
                 straightforward as often assumed and the vulnerability
                 of systems with hardened control-flow may in many cases
                 even be increased by the schemes themselves.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "180",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shresthamali:2017:APM,
  author =       "Shaswot Shresthamali and Masaaki Kondo and Hiroshi
                 Nakamura",
  title =        "Adaptive Power Management in Solar Energy Harvesting
                 Sensor Node Using Reinforcement Learning",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "181:1--181:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126495",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we present an adaptive power manager
                 for solar energy harvesting sensor nodes. We use a
                 simplified model consisting of a solar panel, an ideal
                 battery and a general sensor node with variable duty
                 cycle. Our power manager uses Reinforcement Learning
                 (RL), specifically SARSA($ \lambda $) learning, to
                 train itself from historical data. Once trained, we
                 show that our power manager is capable of adapting to
                 changes in weather, climate, device parameters and
                 battery degradation while ensuring near-optimal
                 performance without depleting or overcharging its
                 battery. Our approach uses a simple but novel general
                 reward function and leverages the use of weather
                 forecast data to enhance performance. We show that our
                 method achieves near perfect energy neutral operation
                 (ENO) with less than 6\% root mean square deviation
                 from ENO as compared to more than 23\% deviation that
                 occur when using other approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "181",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2017:AAS,
  author =       "Sang-Hoon Kim and Jinkyu Jeong and Jin-Soo Kim",
  title =        "Application-Aware Swapping for Mobile Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "182:1--182:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126509",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There has been a constant demand for memory in modern
                 mobile systems to provide users with better experience.
                 Swapping is one of the cost-effective software
                 solutions to provide extra usable memory by reclaiming
                 inactive pages and improving memory utilization.
                 However, swapping has not been actively adopted to
                 mobile systems since it incurs a significant amount of
                 I/O, which in fact impairs system performance as well
                 as user experience. In this paper, we propose a novel
                 scheme to properly harness the swapping to mobile
                 systems. We identify that a vast amount of I/O for
                 swapping comes from the conflict of the traditional
                 page-level approach of the swapping and the
                 process-level memory management scheme tailored to
                 mobile systems. Moreover, we find out that the current
                 victim page selection policy is not effective due to
                 the process-level policy. To address these problems, we
                 revise the victim selection policy to resolve the
                 conflict and to selectively perform swapping according
                 to the efficacy of swapping. Evaluation using a running
                 prototype with realistic workloads indicates that the
                 propose scheme effectively reduces the paging traffic,
                 thereby improving user experience as well as energy
                 consumption.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "182",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ji:2017:LDC,
  author =       "Cheng Ji and Li-Pin Chang and Liang Shi and Congming
                 Gao and Chao Wu and Yuangang Wang and Chun Jason Xue",
  title =        "Lightweight Data Compression for Mobile Flash
                 Storage",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "183:1--183:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126511",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Data compression is beneficial to flash storage
                 lifespan. However, because the design of mobile flash
                 storage is highly cost-sensitive, hardware compression
                 becomes a less attractive option. This study
                 investigates the feasibility of data compression on
                 mobile flash storage. It first characterizes data
                 compressibility based on mobile apps, and the analysis
                 shows that write traffic bound for mobile storage
                 volumes is highly compressible. Based on this finding,
                 a lightweight approach is introduced for firmware-based
                 data compression in mobile flash storage. The
                 controller and flash module work in a pipelined fashion
                 to hide the data compression overhead. Together with
                 this pipelined design, the proposed approach
                 selectively compresses incoming data of high
                 compressibility, while leaving data of low
                 compressibility to a compression-aware garbage
                 collector. Experimental results show that our approach
                 greatly reduced the frequency of block erase by 50.5\%
                 compared to uncompressed flash storage. Compared to
                 unconditional data compression, our approach improved
                 the write latency by 10.4\% at a marginal cost of 4\%
                 more block erase operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "183",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Medhat:2017:MPE,
  author =       "Ramy Medhat and Michael O. Lam and Barry L. Rountree
                 and Borzoo Bonakdarpour and Sebastian Fischmeister",
  title =        "Managing the Performance\slash Error Tradeoff of
                 Floating-point Intensive Applications",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "184:1--184:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126519",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded systems are becoming more reliant on
                 real-valued arithmetic as they employ mathematically
                 complex vision algorithms and sensor signal processing.
                 Double-precision floating point is the most commonly
                 used precision in computer vision algorithm
                 implementations. A single-precision floating point can
                 provide a performance boost due to less memory
                 transfers, less cache occupancy, and relatively faster
                 mathematical operations on some architectures. However,
                 adopting it can result in loss of accuracy. Identifying
                 which parts of the program can run in single-precision
                 floating point with low impact on error is a manual and
                 tedious process. In this paper, we propose an automatic
                 approach to identify parts of the program that have a
                 low impact on error using shadow-value analysis. Our
                 approach provides the user with a performance/error
                 tradeoff, using which the user can decide how much
                 accuracy can be sacrificed in return for performance
                 improvement. We illustrate the impact of the approach
                 using a well known implementation of Apriltag detection
                 used in robotics vision. We demonstrate that an average
                 1.3x speedup can be achieved with no impact on tag
                 detection, and a 1.7x speedup with only 4\% false
                 negatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "184",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sogokon:2017:OMP,
  author =       "Andrew Sogokon and Khalil Ghorbal and Taylor T.
                 Johnson",
  title =        "Operational Models for Piecewise-Smooth Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "185:1--185:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126506",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article we study ways of constructing
                 meaningful operational models of piecewise-smooth
                 systems (PWS). The systems we consider are described by
                 polynomial vector fields defined on non-overlapping
                 semi-algebraic sets, which form a partition of the
                 state space. Our approach is to give meaning to motion
                 in systems of this type by automatically synthesizing
                 operational models in the form of hybrid automata (HA).
                 Despite appearances, it is in practice often difficult
                 to arrive at satisfactory HA models of PWS. The
                 different ways of building operational models that we
                 explore in our approach can be thought of as defining
                 different semantics for the underlying PWS. These
                 differences have a number of interesting nuances
                 related to phenomena such as chattering,
                 non-determinism, so-called mythical modes and sliding
                 behaviour.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "185",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2017:PSV,
  author =       "Chao Huang and Xin Chen and Wang Lin and Zhengfeng
                 Yang and Xuandong Li",
  title =        "Probabilistic Safety Verification of Stochastic Hybrid
                 Systems Using Barrier Certificates",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "186:1--186:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126508",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The problem of probabilistic safety verification of
                 stochastic hybrid systems is to check whether the
                 probability that a given system will reach an unsafe
                 region from certain initial states can be bounded by
                 some given probability threshold. The paper considers
                 stochastic hybrid systems where the behavior is
                 governed by polynomial equalities and inequalities, as
                 for usual hybrid systems, but the initial states follow
                 some stochastic distributions. It proposes a new
                 barrier certificate based method for probabilistic
                 safety verification which guarantees the absolute
                 safety in a infinite time horizon that is beyond the
                 reach of existing techniques using either statistical
                 model checking or probabilistic reachable set
                 computation. It also gives a novel computational
                 approach, by building and solving a constrained
                 optimization problem coming from verification
                 conditions of barrier certificates, to compute the
                 lower bound on safety probabilities which can be
                 compared with the given threshold. Experimental
                 evidence is provided demonstrating the applicability of
                 our approach on several benchmarks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "186",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2017:CRA,
  author =       "Xin Chen and Sergio Mover and Sriram
                 Sankaranarayanan",
  title =        "Compositional Relational Abstraction for Nonlinear
                 Hybrid Systems",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "187:1--187:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126522",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose techniques to construct abstractions for
                 nonlinear dynamics in terms of relations expressed in
                 linear arithmetic. Such relations are useful for
                 translating the closed loop verification problem of
                 control software with continuous-time, nonlinear plant
                 models into discrete and linear models that can be
                 handled by efficient software verification approaches
                 for discrete-time systems. We construct relations using
                 Taylor model based flowpipe construction and the
                 systematic composition of relational abstractions for
                 smaller components. We focus on developing efficient
                 schemes for the special case of composing abstractions
                 for linear and nonlinear components. We implement our
                 ideas using a relational abstraction system, using the
                 resulting abstraction inside the verification tool
                 NuXMV, which implements numerous SAT/SMT solver-based
                 verification techniques for discrete systems. Finally,
                 we evaluate the application of relational abstractions
                 for verifying properties of time triggered controllers,
                 comparing with the Flow* tool. We conclude that
                 relational abstractions are a promising approach
                 towards nonlinear hybrid system verification, capable
                 of proving properties that are beyond the reach of
                 tools such as Flow*. At the same time, we highlight the
                 need for improvements to existing linear arithmetic
                 SAT/SMT solvers to better support reasoning with large
                 relational abstractions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "187",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lesi:2017:SAS,
  author =       "Vuk Lesi and Ilija Jovanov and Miroslav Pajic",
  title =        "Security-Aware Scheduling of Embedded Control Tasks",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "188:1--188:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126518",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, we focus on securing cyber-physical
                 systems (CPS) in the presence of network-based attacks,
                 such as Man-in-the-Middle (MitM) attacks, where a
                 stealthy attacker is able to compromise communication
                 between system sensors and controllers. Standard
                 methods for this type of attacks rely on the use of
                 cryptographic mechanisms, such as Message
                 Authentication Codes (MACs) to ensure data integrity.
                 However, this approach incurs significant computation
                 overhead, limiting its use in resource constrained
                 systems. Consequently, we consider the problem of
                 scheduling multiple control tasks on a shared processor
                 while providing a suitable level of security
                 guarantees. Specifically, by security guarantees we
                 refer to control performance, i.e., Quality-of-Control
                 (QoC), in the presence of attacks. We start by mapping
                 requirements for QoC under attack into constraints for
                 security-aware control tasks that, besides standard
                 control operations, intermittently perform data
                 authentication. This allows for the analysis of the
                 impact that security-related computation overhead has
                 on both schedulability of control tasks and QoC.
                 Building on this analysis, we introduce a mixed-integer
                 linear programming-based technique to obtain a
                 schedulable task set with predefined QoC requirements.
                 Also, to facilitate optimal resource allocation, we
                 provide a method to analyze interplay between available
                 computational resources and the overall QoC under
                 attack, and show how to obtain a schedulable task set
                 that maximizes the overall QoC guarantees. Finally, we
                 prove usability of our approach on a case study with
                 multiple automotive control components.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "188",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2017:SMP,
  author =       "Sumana Ghosh and Souradeep Dutta and Soumyajit Dey and
                 Pallab Dasgupta",
  title =        "A Structured Methodology for Pattern based Adaptive
                 Scheduling in Embedded Control",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "189:1--189:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126514",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software implementation of multiple embedded control
                 loops often share compute resources. The control
                 performance of such implementations have been shown to
                 improve if the sharing of bandwidth between control
                 loops can be dynamically regulated in response to input
                 disturbances. In the absence of a structured
                 methodology for planning such measures, the scheduler
                 may spend too much time in deciding the optimal
                 scheduling pattern. Our work leverages well known
                 results in the domain of network control systems and
                 applies them in the context of bandwidth sharing among
                 controllers. We provide techniques that may be used a
                 priori for computing co-schedulable execution patterns
                 for a given set of control loops such that stability is
                 guaranteed under all possible disturbance scenarios.
                 Additionally, the design of the control loops optimize
                 the average case control performance by adaptive
                 sharing of bandwidth under time varying input
                 disturbances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "189",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gavran:2017:AMR,
  author =       "Ivan Gavran and Rupak Majumdar and Indranil Saha",
  title =        "{Antlab}: a Multi-Robot Task Server",
  journal =      j-TECS,
  volume =       "16",
  number =       "5s",
  pages =        "190:1--190:??",
  month =        oct,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126513",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:33 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present Antlab, an end-to-end system that takes
                 streams of user task requests and executes them using
                 collections of robots. In Antlab, each request is
                 specified declaratively in linear temporal logic
                 extended with quantifiers over robots. The user does
                 not program robots individually, nor know how many
                 robots are available at any time or the precise state
                 of the robots. The Antlab runtime system manages the
                 set of robots, schedules robots to perform tasks,
                 automatically synthesizes robot motion plans from the
                 task specification, and manages the co-ordinated
                 execution of the plan. We provide a constraint-based
                 formulation for simultaneous task assignment and plan
                 generation for multiple robots working together to
                 satisfy a task specification. In order to scalably
                 handle multiple concurrent tasks, we take a separation
                 of concerns view to plan generation. First, we solve
                 each planning problem in isolation, with an ``ideal
                 world'' hypothesis that says there are no unspecified
                 dynamic obstacles or adversarial environment actions.
                 Second, to deal with imprecisions of the real world, we
                 implement the plans in receding horizon fashion on top
                 of a standard robot navigation stack. The motion
                 planner dynamically detects environment actions or
                 dynamic obstacles from the environment or from other
                 robots and locally corrects the ideal planned path. It
                 triggers a re-planning step dynamically if the current
                 path deviates from the planned path or if planner
                 assumptions are violated. We have implemented Antlab as
                 a C++ and Python library on top of robots running on
                 ROS, using SMT-based and AI planning-based
                 implementations for task and path planning. We
                 evaluated Antlab both in simulation as well as on a set
                 of TurtleBot robots. We demonstrate that it can provide
                 a scalable and robust infrastructure for declarative
                 multi-robot programming.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "190",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2018:ETS,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Trust and Security Must Become a Primary
                 Design Concern in Embedded Computing",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173385",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2018:GEA,
  author =       "Jiming Chen and Yu (Jason) Gu and Gil Zussman",
  title =        "Guest Editorial for {ACM TECS}: Special Issue on
                 Autonomous Battery-Free Sensing and Communication",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3127494",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2018:HEW,
  author =       "Qi Chen and Ye Liu and Guangchi Liu and Qing Yang and
                 Xianming Shi and Hongwei Gao and Lu Su and Quanlong
                 Li",
  title =        "Harvest Energy from the Water: a Self-Sustained
                 Wireless Water Quality Sensing System",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047646",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Water quality data is incredibly important and
                 valuable, but its acquisition is not always trivial. A
                 promising solution is to distribute a wireless sensor
                 network in water to measure and collect the data;
                 however, a drawback exists in that the batteries of the
                 system must be replaced or recharged after being
                 exhausted. To mitigate this issue, we designed a
                 self-sustained water quality sensing system that is
                 powered by renewable bioenergy generated from microbial
                 fuel cells (MFCs). MFCs collect the energy released
                 from native magnesium oxidizing microorganisms (MOMs)
                 that are abundant in natural waters. The proposed
                 energy-harvesting technology is environmentally
                 friendly and can provide maintenance-free power to
                 sensors for several years. Despite these benefits, an
                 MFC can only provide microwatt-level power that is not
                 sufficient to continuously power a sensor. To address
                 this issue, we designed a power management module to
                 accumulate energy when the input voltage is as low as
                 0.33V. We also proposed a radio-frequency (RF)
                 activation technique to remotely activate sensors that
                 otherwise are switched off in default. With this
                 innovative technique, a sensor's energy consumption in
                 sleep mode can be completely avoided. Additionally,
                 this design can enable on-demand data acquisitions from
                 sensors. We implement the proposed system and evaluate
                 its performance in a stream. In 3-month field
                 experiments, we find the system is able to reliably
                 collect water quality data and is robust to environment
                 changes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gomez:2018:ELT,
  author =       "Andres Gomez and Lukas Sigrist and Thomas Schalch and
                 Luca Benini and Lothar Thiele",
  title =        "Efficient, Long-Term Logging of Rich Data Sensors
                 Using Transient Sensor Nodes",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047499",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While energy harvesting is generally seen to be the
                 key to power cyber-physical systems in a low-cost,
                 long-term, efficient manner, it has generally required
                 large energy storage devices to mitigate the effects of
                 the source's variability. The emerging class of
                 transiently powered systems embrace this variability by
                 performing computation in proportion to the energy
                 harvested, thereby minimizing the obtrusive and
                 expensive storage element. By using an efficient Energy
                 Management Unit (EMU), small bursts of energy can be
                 buffered in an optimally sized capacitor and used to
                 supply generic loads, even when the average harvested
                 power is only a fraction of that required for sustained
                 system operation. Dynamic Energy Burst Scaling (DEBS)
                 can be used by the load to dynamically configure the
                 EMU to supply small bursts of energy at its optimal
                 power point, independent from the harvester's operating
                 point. Parameters like the maximum burst size, the
                 solar panel's area, as well as the use of
                 energy-efficient Non-Volatile Memory Hierarchy (NVMH)
                 can have a significant impact on the transient system's
                 characteristics such as the wake-up time and the amount
                 of work that can be done per unit of energy.
                 Experimental data from a solar-powered, long-term
                 autonomous image acquisition application show that,
                 regardless of its configuration, the EMU can supply
                 energy bursts to a 43.4mW load with efficiencies of up
                 to 79.7\% and can work with input power levels as low
                 as 140 $ \mu $W. When the EMU is configured to use DEBS
                 and NVMH, the total energy cost of acquiring,
                 processing and storing an image can be reduced by
                 77.8\%, at the price of increasing the energy buffer
                 size by 65\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2018:TAB,
  author =       "Zejue Wang and Hongjia Li and Dan Hu and Song Ci",
  title =        "Transmission Adaptation for Battery-Free Relaying",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055513",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy harvesting (EH)-enabled relaying has attracted
                 considerable attention as an effective way to prolong
                 the operation time of energy-constrained networks and
                 extend coverage beside desired survivability and rate
                 of transmission. In related literature, the
                 Harvest-Store-Use (HSU) model is usually utilized to
                 describe the energy flow behavior of the EH system.
                 However, the half-duplex (HD) constraint of HSU that
                 harvested energy can only be used after being
                 temporally stored in energy buffer may reduce effective
                 transmission time. Thus, we first construct the
                 full-duplex (FD) energy flow behavior model of the EH
                 system where the harvested energy can be tuned to power
                 load and being stored simultaneously. The FD model is
                 then proved to be equivalent with the HSU model when
                 time interval is small enough. Considering some key
                 physical variabilities, for example, the wireless
                 channel and the amount of harvested energy, the
                 transmission adaptation problem for multiple relays
                 embedded with FD EH systems is formulated with the
                 objective to improve the utilization of the harvested
                 energy. We tackle the problem by using a centralized
                 optimization algorithm by jointly tuning the factors,
                 including power control for source and relay nodes,
                 relay selection and dynamic switching among four relay
                 transmission mode, namely HD amplify-and-forward (AF),
                 HD decode-and-forward (DF), FD AF, and FD DF. The
                 centralized optimization algorithm is proposed on the
                 basis of dual decomposition and serves as a benchmark.
                 To enable relays to individually make their own
                 decisions, a distributed algorithm with relatively
                 higher complexity is given by using consensus
                 optimization in conjunction with the alternating
                 direction method of multipliers, and a sub-optimal
                 algorithm with low complexity is provided. The proposed
                 algorithms are shown to have good performance via
                 simulations for a range of different EH rates and
                 prediction errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2018:STW,
  author =       "Zhongqin Wang and Fu Xiao and Ning Ye and Ruchuan Wang
                 and Panlong Yang",
  title =        "A See-through-Wall System for Device-Free Human Motion
                 Sensing Based on Battery-Free {RFID}",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3055515",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A see-through-wall system can be used in life
                 detection, military fields, elderly people
                 surveillance. and gaming. The existing systems are
                 mainly based on military devices, customized signals or
                 pre-deployed sensors inside the room, which are very
                 expensive and inaccessible for general use. Recently, a
                 low-cost RFID technology has gained a lot of attention
                 in this field. Since phase estimates of a battery-free
                 RFID tag collected by a commercial off-the-shelf (COTS)
                 RFID reader are sensitive to external interference, the
                 RFID tag could be regarded as a battery-free sensor
                 that detects reflections off targeted objects. The
                 existing RFID-based system, however, needs to first
                 learn the environment of the empty room beforehand to
                 separate reflections off the tracked target. Besides,
                 it can only track low-speed metal objects with
                 high-positioning accuracy. Since the human body with
                 its complex surface has a weaker ability to reflect
                 radio frequency (RF) signals than metal objects, a
                 battery-free RFID tag can capture only a subset of the
                 reflections off the human body. To address these
                 challenges, a RFID-based human motion sensing
                 technology, called RF-HMS, is presented to track
                 device-free human motion through walls. At first, we
                 construct transfer functions of multipath channel based
                 on phase and RSSI measurements to eliminate device
                 noise and reflections off static objects like walls and
                 furniture without learning the environment of the empty
                 room before. Then a tag planar array is grouped by many
                 battery-free RFID tags to improve the sensing
                 performance. RF-HMS combines reflections from each RFID
                 tag into a reinforced result. On this basis, we extract
                 phase shifts to detect the absence or presence of any
                 moving persons and further derive the reflections off a
                 single moving person to identify his/her forward or
                 backward motion direction. The results show that RF-HMS
                 can effectively detect the absence or presence of
                 moving persons with 100\% accuracy and keep a high
                 accuracy of more than 90\% to track human motion
                 directions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2018:OOP,
  author =       "Chi Lin and Yanhong Zhou and Houbing Song and Chang Wu
                 Yu and Guowei Wu",
  title =        "{OPPC}: an Optimal Path Planning Charging Scheme Based
                 on Schedulability Evaluation for {WRSNs}",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126684",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The lack of schedulability evaluation of previous
                 charging schemes in wireless rechargeable sensor
                 networks (WRSNs) degrades the charging efficiency,
                 leading to node exhaustion. We propose an Optimal Path
                 Planning Charging scheme, namely OPPC, for the
                 on-demand charging architecture. OPPC evaluates the
                 schedulability of a charging mission, which makes
                 charging scheduling predictable. It provides an optimal
                 charging path which maximizes charging efficiency. When
                 confronted with a non-schedulable charging mission, a
                 node discarding algorithm is developed to enable the
                 schedulability. Experimental simulations demonstrate
                 that OPPC can achieve better performance in successful
                 charging rate as well as charging efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2018:JOS,
  author =       "Hang Hu and Hang Zhang and Jianxin Guo and Feng Wang",
  title =        "Joint Optimization of Sensing and Power Allocation in
                 Energy-Harvesting Cognitive Radio Networks",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070709",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The energy-harvesting cognitive radio (CR) network is
                 proposed to improve the spectrum efficiency and energy
                 efficiency. We focus on the optimization of sensing
                 time and power allocation to maximize the throughput of
                 the energy-harvesting CR network subject to the energy
                 causality constraint and collision constraint. Based on
                 the classification of operating regions, the
                 optimization problem is divided into two sub-problems.
                 Then, the efficient iterative Algorithm 1 and Algorithm
                 2 are proposed to solve sub-problem (A) and sub-problem
                 (B), respectively. Numerical results show that a
                 significant improvement in the throughput is achieved
                 via joint optimization of sensing time and power
                 allocation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2018:RRA,
  author =       "Die Wu and Li Lu and Muhammad Jawad Hussain and
                 Songfan Li and Mo Li and Fengli Zhang",
  title =        "{$ R^3 $}: Reliable Over-the-Air Reprogramming on
                 Computational {RFIDs}",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070720",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Computational Radio Frequency Identification (CRFID)
                 tags operate solely on harvested energy and have
                 emerged as viable platforms for a variety of ubiquitous
                 sensing and computation applications. Due to their
                 battery-less nature, these tags can be permanently
                 deployed in hard-to-reach places where the possibility
                 of tag access is eliminated. In such scenarios,
                 maintaining and upgrading the tag's firmware becomes
                 infeasible because programming tools, including wired
                 interface and PC-based software, are required to erase,
                 modify, or reprogram the microcontroller unit's memory.
                 Such limitations necessitate the demand for an
                 over-the-air (OTA) scheme, which can wirelessly
                 reprogram or upgrade the firmware in CRFID tags. In
                 this article, we present $ R^3 $ --- a reliable OTA
                 reprogramming scheme that is compliant with EPC
                 protocol and requires no hardware upgrade to RFID
                 reader or CRFID tag. We demonstrate our scheme on three
                 platforms, which include both software-defined as well
                 as chip-based CRFID tags, that is, WISP5.1 and
                 Optimized WISP (Opt-WISP), and Spider tag,
                 respectively. The selection also includes both the
                 FLASH- and FRAM-based microcontrollers. We extensively
                 evaluate our scheme in terms of several metrics,
                 including overall system delay, time and energy
                 overhead, and success rate in line with interrogation
                 range. We foresee our endeavor to offer the viability
                 of OTA reprogramming and firmware upgrade for CRFID
                 tokens under practical situations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2018:NOC,
  author =       "Songyuan Li and Lingkun Fu and Shibo He and Youxian
                 Sun",
  title =        "Near-Optimal Co-Deployment of Chargers and Sink
                 Stations in Rechargeable Sensor Networks",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070721",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless charging technology has drawn great attention
                 of both academia and industry in recent years, due to
                 its potential of significantly improving the system
                 performance of sensor networks. The emergence of an
                 open-source experimental platform for wireless
                 rechargeable sensor networks, Powercast, has made the
                 theoretical research closer to reality. This pioneering
                 platform is able to recharge sensor nodes much more
                 efficiently and allows different communication
                 protocols to be implemented upon users' demands.
                 Different from the RFID-based model widely used in the
                 existing works, Powercast designs the charger and sink
                 station separately. This leads to a new design
                 challenge of cooperatively deploying minimum number of
                 chargers and sink stations in wireless rechargeable
                 sensor networks. Such a co-deployment issue is
                 extremely challenging, since the deployments of
                 chargers and sink stations are coupled, and each
                 subproblem is known to be NP-hard. The key to the
                 design is to understand the intrinsic relationship
                 between data flow and energy flow, which is
                 interdependent. In this article, we tackle this
                 challenge by dividing it into two subproblems and
                 optimizing charger and sink station deployment
                 iteratively. Specifically, we first transform each
                 subproblem to a max-flow problem. With this, we are
                 able to select chargers or sink stations according to
                 their contributions to the total flow rate. We design
                 greedy-based algorithms with a guaranteed worst-case
                 bound $ \ln R / \xi $ for the subproblems of charger
                 deployment and sink station deployment, respectively.
                 Further, we address the original problem by designing
                 an iterative algorithm that solves two subproblems
                 alternatively to achieve a near optimal performance. We
                 corroborate our analysis by extensive simulations under
                 practical coefficient settings and demonstrate the
                 advantage of the proposed algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wagemann:2018:OEN,
  author =       "Peter W{\"a}gemann and Tobias Distler and Heiko Janker
                 and Phillip Raffeck and Volkmar Sieh and Wolfgang
                 Schr{\"o}der-Preikschat",
  title =        "Operating Energy-Neutral Real-Time Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3078631",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy-neutral real-time systems harvest the entire
                 energy they use from their environment. In such
                 systems, energy must be treated as an equally important
                 resource as time, which creates the need to solve a
                 number of problems that so far have not been addressed
                 by traditional real-time systems. In particular, this
                 includes the scheduling of tasks with both time and
                 energy constraints, the monitoring of energy budgets,
                 as well as the survival of blackout periods during
                 which not enough energy is available to keep the system
                 fully operational. In this article, we address these
                 issues presenting E nOS, an operating-system kernel for
                 energy-neutral real-time systems. EnOS considers mixed
                 time criticality levels for different energy
                 criticality modes, which enables a decoupling of time
                 and energy constraints when one is considered less
                 critical than the other. When switching the energy
                 criticality mode, the system also changes the set of
                 executed tasks and is therefore able to dynamically
                 adapt its energy consumption depending on external
                 conditions. By keeping track of the energy budget
                 available, EnOS ensures that in case of a blackout the
                 system state is safely stored to persistent memory,
                 allowing operations to resume at a later point when
                 enough energy is harvested again.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rajib:2018:PRI,
  author =       "MD. Majharul Islam Rajib and Asis Nasipuri",
  title =        "Predictive Retransmissions for Intermittently
                 Connected Sensor Networks with Transmission Diversity",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092947",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Batteryless wireless sensor networks that rely on
                 energy harvested from the environment often exhibit
                 random power outages due to limitations of energy
                 resources, which give rise to intermittent connectivity
                 and long transmission delays. To improve the delay
                 performance in such networks, we consider a design
                 strategy that uses predictive retransmissions to
                 maximize the probability of success for each
                 transmission. This is applied to two different
                 transmission diversity schemes: cooperative relaying
                 over unicast routes and opportunistic routing.
                 Performance evaluations from theoretical models and
                 simulations are presented that show that significant
                 gains can be achieved using the proposed approach in
                 such networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xu:2018:GEP,
  author =       "Chi Xu and Wei Liang and Haibin Yu",
  title =        "Green-Energy-Powered Cognitive Radio Networks: Joint
                 Time and Power Allocation",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092949",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article studies a green-energy-powered cognitive
                 radio network (GCRN) in an underlay paradigm, wherein
                 multiple battery-free secondary users (SUs) capture
                 both the spectrum and the energy of primary users (PUs)
                 to communicate with an access point (AP). By time
                 division multiple access, each SU transmits data to AP
                 in the allocated time and harvests energy from the RF
                 signals of PUs otherwise, all in the same licensed
                 spectrum concurrently with PUs. Thus, the transmit
                 power of each SU is jointly constrained by the peak
                 interference power at PU and the harvested energy of
                 SU. With the formulated green coexistence paradigm, we
                 investigate the sum-throughput maximization problem
                 with respect to time and power allocation, which is
                 non-convex. To obtain the optimal resource allocation,
                 we propose a joint optimal time and power allocation
                 (JOTPA) algorithm that first transforms the original
                 problem into a convex optimization problem with respect
                 to time and energy allocation, and then solve it by
                 iterative Lagrange dual decomposition. To
                 comprehensively evaluate the performance of the GCRN
                 with JOTPA, we deploy the GCRN in three typical
                 scenarios and compare JOTPA with the equal time and
                 optimal power allocation (ETOPA) algorithm. Extensive
                 simulations show that the deployment of the GCRN
                 significantly influences the throughput performance and
                 JOTPA outperforms ETOPA under all considered
                 scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Eles:2018:GES,
  author =       "Petru Eles and J{\"o}rg Henkel",
  title =        "Guest Editorial for the Special Issue of {ESWEEK
                 2016}",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3152097",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hegde:2018:CAC,
  author =       "Gopalakrishna Hegde and Siddhartha and Nachiket
                 Kapre",
  title =        "{CaffePresso}: Accelerating Convolutional Networks on
                 Embedded {SoCs}",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105925",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Auto-tuning and parametric implementation of deep
                 learning kernels allow off-the-shelf accelerator-based
                 embedded platforms to deliver high-performance and
                 energy-efficient mappings of the inference phase of
                 lightweight neural networks. Low-complexity classifiers
                 are characterized by operations on small image maps
                 with two to three deep layers and few class labels. For
                 these use cases, we consider a range of embedded
                 systems with 20W power budgets such as the Xilinx ZC706
                 (FPGA), NVIDIA Jetson TX1 (GPU), TI Keystone II (DSP),
                 and Adapteva Parallella (RISC+NoC). In CaffePresso, we
                 combine auto-tuning of the implementation parameters,
                 and platform-specific constraints deliver optimized
                 solutions for each input ConvNet specification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tan:2018:LLP,
  author =       "Cheng Tan and Aditi Kulkarni and Vanchinathan
                 Venkataramani and Manupa Karunaratne and Tulika Mitra
                 and Li-Shiuan Peh",
  title =        "{LOCUS}: Low-Power Customizable Many-Core Architecture
                 for Wearables",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3122786",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application requirements, such as real-time response,
                 are pushing wearable devices to leverage more powerful
                 processors inside the SoC (system on chip). However,
                 existing wearable devices are not well suited for such
                 challenging applications due to poor performance, and
                 the conventional powerful many-core architectures are
                 not appropriate either due to the stringent power
                 budget in this domain. We propose LOCUS-a low-power,
                 customizable, many-core processor for next-generation
                 wearable devices. LOCUS combines customizable processor
                 cores with a customizable network on a message-passing
                 architecture to deliver very competitive
                 performance/watt-an average $ 3.1 \times $ compared to
                 quad-core ARM processors used in state-of-the-art
                 wearable devices. A combination of full system
                 simulation with representative applications from the
                 wearable domain and RTL synthesis of the architecture
                 show that 16-core LOCUS achieves an average $ 1.52
                 \times $ performance/watt improvement over a
                 conventional 16-core shared memory many-core
                 architecture. A dynamic power management mechanism is
                 proposed to further decrease the power consumption in
                 both computation and communication, which improves the
                 performance/watt of LOCUS by $ 1.17 \times $.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sutar:2018:DPI,
  author =       "Soubhagya Sutar and Arnab Raha and Devadatta Kulkarni
                 and Rajeev Shorey and Jeffrey Tew and Vijay
                 Raghunathan",
  title =        "{D-PUF}: an Intrinsically Reconfigurable {DRAM PUF}
                 for Device Authentication and Random Number
                 Generation",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105915",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/prng.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Physically Unclonable Functions (PUFs) have proved to
                 be an effective and low-cost measure against
                 counterfeiting by providing device authentication and
                 secure key storage services. Memory-based PUF
                 implementations are an attractive option due to the
                 ubiquitous nature of memory in electronic devices and
                 the requirement of minimal (or no) additional
                 circuitry. Dynamic Random Access Memory-- (DRAM) based
                 PUFs are particularly advantageous due to their large
                 address space and multiple controllable parameters
                 during response generation. However, prior works on
                 DRAM PUFs use a static response-generation mechanism
                 making them vulnerable to security attacks. Further,
                 they result in slow device authentication, are not
                 applicable to commercial off-the-shelf devices, or
                 require DRAM power cycling prior to authentication. In
                 this article, we propose D-PUF, an intrinsically
                 reconfigurable DRAM PUF based on the idea of DRAM
                 refresh pausing. A key feature of the proposed DRAM PUF
                 is reconfigurability, that is, by varying the DRAM
                 refresh-pause interval, the challenge-response behavior
                 of the PUF can be altered, making it robust to various
                 attacks. The article is broadly divided into two parts.
                 In the first part, we demonstrate the use of D-PUF in
                 performing device authentication through a secure,
                 low-overhead methodology. In the second part, we show
                 the generation of true random numbers using D-PUF. The
                 design is implemented and validated using an Altera
                 Stratix IV GX FPGA-based Terasic TR4-230 development
                 board and several off-the-shelf 1GB DDR3 DRAM modules.
                 Our experimental results demonstrate a $ 4.3 \times
                 $--$ 6.4 \times $ reduction in authentication time
                 compared to prior work. Using controlled temperature
                 and accelerated aging tests, we also demonstrate the
                 robustness of our authentication mechanism to
                 temperature variations and aging effects. Finally, the
                 ability of the design to generate random numbers is
                 verified using the NIST Statistical Test Suite.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guo:2018:IWP,
  author =       "Jie Guo and Chuhan Min and Tao Cai and Yiran Chen",
  title =        "Improving Write Performance and Extending Endurance of
                 Object-Based {NAND} Flash Devices",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105924",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Write amplification is a major cause of performance
                 and endurance degradations in NAND flash-based storage
                 systems. In an object-based NAND flash device (ONFD),
                 two causes of write amplification are onode partial
                 update and cascading update. Here, onode is a type of
                 small-sized object metadata, and multiple onodes are
                 stored in one NAND flash page. Updating one onode
                 invokes partial page update (i.e., onode partial
                 update), incurring unnecessary migration of the
                 un-updated data. Cascading update denotes updating
                 object metadata in a cascading manner due to object
                 data update or migration. Although there are only
                 several bytes that need to be updated in the object
                 metadata, one or more pages have to be re-written
                 accordingly. In this work, we propose a system design
                 to alleviate the write amplification issue in the
                 object-based NAND flash device. The proposed design
                 includes (1) a multi-level garbage collection technique
                 to minimize unnecessary data migration incurred by
                 onode partial update and (2) a B+ table tree,
                 Semantics-Aware Flexible (SAF) data layout, and
                 selective cache design to reduce the write operations
                 associated with cascading update. To guarantee system
                 consistency, we also propose a power failure handling
                 technique. Experiment results show that our proposed
                 design can achieve up to 20\% write reduction compared
                 to the best states of the art.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Maier:2018:FIT,
  author =       "Petra R. Maier and Veit B. Kleeberger and Daniel
                 Mueller-Gritschneder and Ulf Schlichtmann",
  title =        "Fault Injection for Test-Driven Development of Robust
                 {SoC} Firmware",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092943",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Robustness against errors in hardware must be
                 considered from the very beginning of safety-critical
                 system-on-chip firmware design. Therefore, we present
                 fault injection for test-driven development (TDD) of
                 robust firmware. As TDD is based on instant feedback to
                 the designer, fault injection must execute within few
                 minutes. In contrast to state-of-the-art approaches, we
                 avoid long simulation scenarios and runtimes by
                 injecting faults at the unit level and utilizing
                 host-compiled simulation. Further, three static
                 bit-level analyses of firmware source code and hardware
                 specification reduce the fault set significantly. This
                 accelerates fault injection by several orders of
                 magnitude and enables robustness-aware TDD.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Balkan:2018:UFA,
  author =       "Ayca Balkan and Paulo Tabuada and Jyotirmoy V.
                 Deshmukh and Xiaoqing Jin and James Kapinski",
  title =        "{Underminer}: a Framework for Automatically
                 Identifying Nonconverging Behaviors in Black-Box System
                 Models",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3122787",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Evaluation of industrial embedded control system
                 designs is a time-consuming and imperfect process.
                 While an ideal process would apply a formal
                 verification technique such as model checking or
                 theorem proving, these techniques do not scale to
                 industrial design problems, and it is often difficult
                 to use these techniques to verify performance aspects
                 of control system designs, such as stability or
                 convergence. For industrial designs, engineers rely on
                 testing processes to identify critical or unexpected
                 behaviors. We propose a novel framework called
                 Underminer to improve the testing process; this is an
                 automated technique to identify nonconverging behaviors
                 in embedded control system designs. Underminer treats
                 the system as a black box and lets the designer
                 indicate the model parameters, inputs, and outputs that
                 are of interest. It differentiates convergent from
                 nonconvergent behaviors using Convergence Classifier
                 Functions (CCFs). The tool can be applied in the
                 context of testing models created late in the
                 controller development stage, where it assumes that the
                 given model displays mostly convergent behavior and
                 learns a CCF in an unsupervised fashion from such
                 convergent model behaviors. This CCF is then used to
                 guide a thorough exploration of the model with the help
                 of optimization-guided techniques or adaptive sampling
                 techniques, with the goal of identifying rare
                 nonconvergent model behaviors. Underminer can also be
                 used early in the development stage, where models may
                 have some significant nonconvergent behaviors. Here,
                 the framework permits designers to indicate their
                 mental model for convergence by labeling behaviors as
                 convergent/nonconvergent and then constructs a CCF
                 using a supervised learning technique. In this use
                 case, the goal is to use the CCF to test an improved
                 design for the model. Underminer supports a number of
                 convergence-like notions, such as those based on
                 Lyapunov analysis and temporal logic, and also CCFs
                 learned directly from labeled output behaviors using
                 machine-learning techniques such as support vector
                 machines and neural networks. We demonstrate the
                 efficacy of Underminer by evaluating its performance on
                 several academic as well as industrial examples.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fan:2018:SDR,
  author =       "Chuchu Fan and James Kapinski and Xiaoqing Jin and
                 Sayan Mitra",
  title =        "Simulation-Driven Reachability Using Matrix Measures",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3126685",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Simulation-driven verification can provide formal
                 safety guarantees for otherwise intractable nonlinear
                 and hybrid system models. A key step in
                 simulation-driven algorithms is to compute the reach
                 set overapproximations from a set of initial states
                 through numerical simulations and sensitivity analysis.
                 This article addresses this problem by providing
                 algorithms for computing discrepancy functions as the
                 upper bound on the sensitivity, that is, the rate at
                 which trajectories starting from neighboring states
                 converge or diverge. The algorithms rely on computing
                 local bounds on matrix measures as the exponential
                 change rate of the discrepancy function. We present two
                 techniques to compute the matrix measures under
                 different norms: regular Euclidean norm or Euclidean
                 norm under coordinate transformation, such that the
                 exponential rate of the discrepancy function, and
                 therefore, the conservativeness of the
                 overapproximation, is locally minimized. The proposed
                 algorithms enable automatic reach set computations of
                 general nonlinear systems and have been successfully
                 used on several challenging benchmark models. All
                 proposed algorithms for computing discrepancy functions
                 give soundness and relative completeness of the overall
                 simulation-driven safety-bounded verification
                 algorithm. We present a series of experiments to
                 illustrate the accuracy and performance of the
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2018:PSC,
  author =       "Hyoseung Kim and Ragunathan (Raj) Rajkumar",
  title =        "Predictable Shared Cache Management for Multi-Core
                 Real-Time Virtualization",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092946",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  abstract =     "Real-time virtualization has gained much attention for
                 the consolidation of multiple real-time systems onto a
                 single hardware platform while ensuring timing
                 predictability. However, a shared last-level cache
                 (LLC) on modern multi-core platforms can easily hamper
                 the timing predictability of real-time virtualization
                 due to the resulting temporal interference among
                 consolidated workloads. Since such interference caused
                 by the LLC is highly variable and may have not even
                 existed in legacy systems to be consolidated, it poses
                 a significant challenge for real-time virtualization.
                 In this article, we propose a predictable shared cache
                 management framework for multi-core real-time
                 virtualization. Our framework introduces two
                 hypervisor-level techniques, vLLC and vColoring, that
                 enable the cache allocation of individual tasks running
                 in a virtual machine (VM), which is not achievable by
                 the current state of the art. Our framework also
                 provides a cache management scheme that determines
                 cache allocation to tasks, designs VMs in a cache-aware
                 manner, and minimizes the aggregated utilization of VMs
                 to be consolidated. As a proof of concept, we
                 implemented vLLC and vColoring in the KVM hypervisor
                 running on x86 and ARM multi-core platforms.
                 Experimental results with three different guest OSs
                 (i.e., Linux/RK, vanilla Linux, and MS Windows
                 Embedded) show that our techniques can effectively
                 control the cache allocation of tasks in VMs. Our cache
                 management scheme yields a significant utilization
                 benefit compared to other approaches while satisfying
                 timing constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kahkonen:2018:TPC,
  author =       "Kari K{\"a}hk{\"o}nen and Keijo Heljanko",
  title =        "Testing Programs with Contextual Unfoldings",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "23:1--23:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2810000",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present a new algorithm that
                 combines contextual unfoldings and dynamic symbolic
                 execution to systematically test multithreaded
                 programs. The approach uses symbolic execution to limit
                 the number of input values and unfoldings to thus limit
                 the number of thread interleavings that are needed to
                 cover reachable local states of threads in the program
                 under test. We show that the use of contextual
                 unfoldings allows interleavings of threads to be
                 succinctly represented. This can in some cases lead to
                 a substantial reduction in the number of needed test
                 executions when compared to previous approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gu:2018:EST,
  author =       "Xiaozhe Gu and Arvind Easwaran",
  title =        "Efficient Schedulability Test for Dynamic-Priority
                 Scheduling of Mixed-Criticality Real-Time Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "24:1--24:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105922",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Systems in many safety-critical application domains
                 are subject to certification requirements. In such a
                 system, there are typically different applications
                 providing functionalities that have varying degrees of
                 criticality. Consequently, the certification
                 requirements for functionalities at these different
                 criticality levels are also varying, with very high
                 levels of assurance required for a highly critical
                 functionality, whereas relatively low levels of
                 assurance are required for a less critical
                 functionality. Considering the timing assurance given
                 to various applications in the form of guaranteed
                 budgets within deadlines, a theory of real-time
                 scheduling for such multi-criticality systems has been
                 recently under development. In particular, an algorithm
                 called Earliest Deadline First with Virtual Deadlines
                 (EDF-VD) has shown a lot of promise for systems with
                 two criticality levels, especially in terms of
                 practical performance demonstrated through experiment
                 results. In this article, we design a new
                 schedulability test for EDF-VD that extends these
                 performance benefits to multi-criticality systems. We
                 propose a new test based on demand bound functions and
                 also present a novel virtual deadline assignment
                 strategy. Through extensive experiments, we show that
                 the proposed technique significantly outperforms
                 existing strategies for a variety of generic real-time
                 systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kulkarni:2018:LOC,
  author =       "Amey Kulkarni and Colin Shea and Tahmid Abtahi and
                 Houman Homayoun and Tinoosh Mohsenin",
  title =        "Low Overhead {CS}-Based Heterogeneous Framework for
                 Big Data Acceleration",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "25:1--25:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092944",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Big data processing on hardware gained immense
                 interest among the hardware research community to take
                 advantage of fast processing and reconfigurability.
                 Though the computation latency can be reduced using
                 hardware, big data processing cost is dominated by data
                 transfers. In this article, we propose a low overhead
                 framework based on compressive sensing (CS) to reduce
                 data transfers up to 67\% without affecting signal
                 quality. CS has two important kernels: ``sensing'' and
                 ``reconstruction.'' In this article, we focus on CS
                 reconstruction is using orthogonal matching pursuit
                 (OMP) algorithm. We implement the OMP CS reconstruction
                 algorithm on a domain-specific PENC many-core platform
                 and a low-power Jetson TK1 platform consisting of an
                 ARM CPU and a K1 GPU. Detailed performance analysis of
                 OMP algorithm on each platform suggests that the PENC
                 many-core platform has $ 15 \times $ and $ 18 \times $
                 less energy consumption and $ 16 \times $ and $ 8
                 \times $ faster reconstruction time as compared to the
                 low-power ARM CPU and K1 GPU, respectively.
                 Furthermore, we implement the proposed CS-based
                 framework on heterogeneous architecture, in which the
                 PENC many-core architecture is used as an
                 ``accelerator'' and processing is performed on the ARM
                 CPU platform. For demonstration, we integrate the
                 proposed CS-based framework with a Hadoop MapReduce
                 platform for a face detection application. The results
                 show that the proposed CS-based framework with the PENC
                 many-core as an accelerator achieves a 26.15\% data
                 storage/transfer reduction, with an execution time and
                 energy consumption overhead of 3.7\% and 0.002\%,
                 respectively, for 5,000 image transfers. Compared to
                 the CS-based framework implementation on the low-power
                 Jetson TK1 ARM CPU+GPU platform, the PENC many-core
                 implementation is $ 2.3 \times $ faster for the image
                 reconstruction part, while achieving 29\% higher
                 performance and 34\% better energy efficiency for the
                 complete face detection application on the Hadoop
                 MapReduce platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nodeh:2018:EAM,
  author =       "Mohammad Taghi Teimoori Nodeh and Mostafa Bazzaz and
                 Alireza Ejlali",
  title =        "Exploiting Approximate {MLC-PCM} in Low-Power Embedded
                 Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "26:1--26:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105926",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multi-level cell phase change memory (MLC-PCM),
                 because of its very low leakage power and high density,
                 is promising for embedded systems. Furthermore, for
                 applications with inherent low sensitivity to errors,
                 approximate write operations can be exploited in
                 MLC-PCM to improve endurance and performance. However,
                 data that reside in the approximate MLC-PCM for a
                 rather long time without refreshing are prone to soft
                 errors due to resistance drift phenomenon, while even
                 for an application with inherent low sensitivity to
                 errors, a high soft error rate can degrade its Quality
                 of Result (QoR). The architecture-level approaches to
                 decrease the drift effect incur considerable power
                 overhead (about 100\%), which is a prominent issue in
                 embedded systems, and are dependent on the number of
                 logic levels stored in the PCM cell (e.g., most of them
                 are designed for 4LC-PCM). This article, taking a
                 different approach, proposes a drift-aware frequency
                 and voltage management to alleviate the drift-based
                 soft-error rate. To this end, first we characterize the
                 application data based on the degree of being exposed
                 to the drift to identify the drift-prone application
                 data. Then we assign the execution frequency and
                 voltage to different regions of the application
                 considering the drift. This frequency assignment speeds
                 up the application regions wherein the drift-prone data
                 are accessed to shorten the lifetime of the drift-prone
                 data, thereby decreasing the soft error rate. An
                 integer linear programming model implements our
                 proposed Dynamic Voltage Frequency Scaling (DVFS).
                 Also, the proposed approach is independent of the
                 number of levels of PCM cells and can be applied to any
                 MLC-PCM system. To evaluate the approach, the
                 approximate MLC-PCM is simulated using empirical models
                 and is integrated into a full-system simulator as data
                 memory. The experimental results show that, by
                 exploiting the approach, QoR is in the acceptable
                 range, while its power overhead is about 84\% (on
                 average) less than that of the architecture-level
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gaglio:2018:DPD,
  author =       "Salvatore Gaglio and Giuseppe {Lo Re} and Gloria
                 Martorella and Daniele Peri",
  title =        "{DC4CD}: a Platform for Distributed Computing on
                 Constrained Devices",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "27:1--27:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105923",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present Distributed Computing for
                 Constrained Devices (DC4CD), a novel software
                 architecture that supports symbolic distributed
                 computing on wireless sensor networks. DC4CD integrates
                 the functionalities of a high-level symbolic
                 interpreter, a compiler, and an operating system, and
                 includes networking abstractions to exchange high-level
                 symbolic code among peer devices. Contrarily to other
                 architectures proposed in the literature, DC4CD allows
                 for changes at runtime, even on deployed nodes of both
                 application and system code. Experimental results show
                 that DC4CD is more efficient in terms of memory usage
                 than existing architectures, with which it also
                 compares well in terms of execution efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Namazi:2018:MBR,
  author =       "Alireza Namazi and Meisam Abdollahi and Saeed Safari
                 and Siamak Mohammadi",
  title =        "A Majority-Based Reliability-Aware Task Mapping in
                 High-Performance Homogeneous {NoC} Architectures",
  journal =      j-TECS,
  volume =       "17",
  number =       "1",
  pages =        "28:1--28:??",
  month =        jan,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3131273",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a new reliability-aware task
                 mapping approach in a many-core platform at design time
                 for applications with DAG-based task graphs. The main
                 goal is to devise a task mapping which meets a
                 predefined reliability threshold considering a
                 minimized performance degradation. The proposed
                 approach uses a majority-voting replication technique
                 to fulfill error-masking capability. A quantitative
                 reliability model is also proposed for the platform.
                 Our platform is a homogeneous many-core architecture
                 with mesh-based interconnection using traditional
                 deterministic XY routing algorithm. Our iterative
                 approach is applicable to an unlimited number of system
                 fault types. All parts of the platform, including
                 cores, links, and routers, are assumed to be prone to
                 failures. We used the MNLP optimization technique to
                 find the optimal mapping of the presented task graph.
                 Experimental results show that our suggested task
                 mappings not only comply with predefined reliability
                 thresholds but also achieve notable time complexity
                 reduction with respect to exhaustive space
                 exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2018:EIC,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: {Industry 4.0} --- a Confluence of Embedded
                 Artificial Intelligence, Machine Learning, Robotics and
                 Security",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3194944",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leonard:2018:GES,
  author =       "Elizabeth Leonard",
  title =        "Guest Editorial: Special Issue on Formal Methods and
                 Models for System Design",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3162079",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tanase:2018:SML,
  author =       "Alexandru Tanase and Michael Witterauf and J{\"u}rgen
                 Teich and Frank Hannig",
  title =        "Symbolic Multi-Level Loop Mapping of Loop Programs for
                 Massively Parallel Processor Arrays",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092952",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today's MPSoCs (multiprocessor systems-on-chip) have
                 brought up massively parallel processor array
                 accelerators that may achieve a high computational
                 efficiency by exploiting multiple levels of parallelism
                 and different memory hierarchies. Such parallel
                 processor arrays are perfect targets, particularly for
                 the acceleration of nested loop programs due to their
                 regular and massively parallel nature. However,
                 existing loop parallelization techniques are often
                 unable to exploit multiple levels of parallelism and
                 are either I/O or memory bounded. Furthermore, if the
                 number of available processing elements becomes only
                 known at runtime-as in adaptive systems-static
                 approaches fail. In this article, we solve some of
                 these problems by proposing a hybrid compile/runtime
                 multi-level symbolic parallelization technique that is
                 able to: (a) exploit multiple levels of parallelism as
                 well as (b) different memory hierarchies, and (c) to
                 match the I/O or memory capabilities of the target
                 architecture for scenarios where the number of
                 available processing elements is only known at runtime.
                 Our proposed technique consists of two compile-time
                 transformations: (a) symbolic hierarchical tiling
                 followed by (b) symbolic multi-level scheduling. The
                 tiling levels scheduled in parallel exploit different
                 levels of parallelism, whereas the sequential one,
                 different memory hierarchies. Furthermore, by tuning
                 the size of the tiles on the individual levels, a
                 tradeoff between the necessary I/O-bandwidth and memory
                 is possible, which facilitates obeying resource
                 constraints. The resulting schedules are symbolic with
                 respect to the problem size and tile sizes. Thus, the
                 number of processing elements to map onto does not need
                 to be known at compile time. At runtime, when the
                 number of available processors becomes known, a simple
                 prologue chooses a feasible schedule with respect to
                 I/O and memory constraints that is latency-optimal for
                 the chosen tile size. In summary, our approach
                 determines the set of feasible, latency-optimal
                 symbolic loop schedule candidates at compile time, from
                 which one is dynamically selected at runtime. This
                 approach exploits multiple levels of parallelism, is
                 independent of the problem size of the loop nest, and
                 thereby avoids any expensive re-compilation at runtime.
                 This is particularly important for low cost and
                 memory-scarce embedded MPSoC platforms that may not
                 afford to host a just-in-time compiler.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Attie:2018:MPR,
  author =       "Paul C. Attie and Kinan Dak {Al Bab} and Mouhammad
                 Sakr",
  title =        "Model and Program Repair via {SAT} Solving",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3147426",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider the subtractive model repair problem:
                 given a finite Kripke structure M and a CTL formula $
                 \eta $, determine if M contains a substructure M$^'$
                 that satisfies $ \eta $. Thus, M can be ``repaired'' to
                 satisfy eta by deleting some transitions and states. We
                 map an instance $ \langle M, \eta \rangle $ of model
                 repair to a Boolean formula repair $ (M, \eta)$ such
                 that $ \langle M, \eta \rangle $ has a solution iff
                 repair $ (M, \eta)$ is satisfiable. Furthermore, a
                 satisfying assignment determines which states and
                 transitions must be removed from $M$ to yield a model $
                 M^'$ of $ \eta $. Thus, we can use any SAT solver to
                 repair Kripke structures. Using a complete SAT solver
                 yields a complete algorithm: it always finds a repair
                 if one exists. We also show that CTL model repair is
                 NP-complete. We extend the basic repair method in three
                 directions: (1) the use of abstraction mappings, that
                 is, repair a structure abstracted from M and then
                 concretize the resulting repair to obtain a repair of
                 M, (2) repair concurrent Kripke structures and
                 concurrent programs: we use the pairwise method of
                 Attie and Emerson to represent and repair the behavior
                 of a concurrent program, as a set of ``concurrent
                 Kripke structures'', with only a quadratic increase in
                 the size of the repair formula, and (3) repair
                 hierarchical Kripke structures: we use a CTL formula to
                 summarize the behavior of each ``box,'' and CTL
                 deduction to relate the box formula with the overall
                 specification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Smyth:2018:SSC,
  author =       "Steven Smyth and Christian Motika and Karsten Rathlev
                 and Reinhard {Von Hanxleden} and Michael Mendler",
  title =        "{SCEst}: Sequentially Constructive {Esterel}",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3063129",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The synchronous language Esterel provides determinate
                 concurrency for reactive systems. Determinacy is
                 ensured by the signal coherence rule, which demands
                 that signals have a stable value throughout one
                 reaction cycle. This is natural for the original
                 application domains of Esterel, such as controller
                 design and hardware development; however, it is
                 unnecessarily restrictive for software development.
                 Sequentially Constructive Esterel (SCEst) overcomes
                 this restriction by allowing values to change
                 instantaneously, as long as determinacy is still
                 guaranteed, adopting the recently proposed Sequentially
                 Constructive model of computation. SCEst is grounded in
                 the minimal Sequentially Constructive Language (scl),
                 which also provides a novel semantic definition and
                 compilation approach for Esterel.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dokhanchi:2018:FRD,
  author =       "Adel Dokhanchi and Bardh Hoxha and Georgios Fainekos",
  title =        "Formal Requirement Debugging for Testing and
                 Verification of Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3147451",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A framework for the elicitation and debugging of
                 formal specifications for Cyber-Physical Systems is
                 presented. The elicitation of specifications is handled
                 through a graphical interface. Two debugging algorithms
                 are presented. The first checks for erroneous or
                 incomplete temporal logic specifications without
                 considering the system. The second can be utilized for
                 the analysis of reactive requirements with respect to
                 system test traces. The specification debugging
                 framework is applied on a number of formal
                 specifications collected through a user study. The user
                 study establishes that requirement errors are common
                 and that the debugging framework can resolve many
                 insidious specification errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2018:FPS,
  author =       "Zheng Li and Shuibing He",
  title =        "Fixed-Priority Scheduling for Two-Phase
                 Mixed-Criticality Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3105921",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a two-phase execution model is
                 proposed for mixed-criticality (MC) tasks. Different
                 from traditional MC tasks with a computation phase
                 only, the two-phase execution model requires a
                 memory-access phase first to fetch the instructions and
                 data, and then computation. Theoretical foundations are
                 first established for a schedulability test under given
                 memory-access and computation priority assignment.
                 Based on the established theoretical conclusions, a
                 two-stage priority assignment algorithm, which can find
                 the best priority assignment for both memory-access and
                 computation phases under fixed-priority scheduling, is
                 further developed. Extensive experiments have been
                 conducted and the experimental results validate the
                 effectiveness of our proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2018:EVL,
  author =       "Lihao Liang and Tom Melham and Daniel Kroening and
                 Peter Schrammel and Michael Tautschnig",
  title =        "Effective Verification for Low-Level Software with
                 Competing Interrupts",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3147432",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Interrupt-driven software is difficult to test and
                 debug, especially when interrupts can be nested and
                 subject to priorities. Interrupts can arrive at
                 arbitrary times, leading to an exponential blow-up in
                 the number of cases to consider. We present a new
                 formal approach to verifying interrupt-driven software
                 based on symbolic execution. The approach leverages
                 recent advances in the encoding of the execution traces
                 of interacting, concurrent threads. We assess the
                 performance of our method on benchmarks drawn from
                 embedded systems code and device drivers, and
                 experimentally compare it to conventional approaches
                 that use source-to-source transformations. Our results
                 show that our method significantly outperforms these
                 techniques. To the best of our knowledge, our work is
                 the first to demonstrate effective verification of
                 low-level embedded software with nested interrupts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xie:2018:ESA,
  author =       "Xinfeng Xie and Dayou Du and Qian Li and Yun Liang and
                 Wai Teng Tang and Zhong Liang Ong and Mian Lu and Huynh
                 Phung Huynh and Rick Siow Mong Goh",
  title =        "Exploiting Sparsity to Accelerate Fully Connected
                 Layers of {CNN}-Based Applications on Mobile {SoCs}",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3122788",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Convolutional neural networks (CNNs) are widely
                 employed in many image recognition applications. With
                 the proliferation of embedded and mobile devices, such
                 applications are becoming commonplace on mobile
                 devices. Network pruning is a commonly used strategy to
                 reduce the memory and storage footprints of CNNs on
                 mobile devices. In this article, we propose customized
                 versions of the sparse matrix multiplication algorithm
                 to speed up inference on mobile devices and make it
                 more energy efficient. Specifically, we propose a Block
                 Compressed Sparse Column algorithm and a
                 bit-representation-based algorithm (BitsGEMM) that
                 exploit sparsity to accelerate the fully connected
                 layers of a network on the NVIDIA Jetson TK1 platform.
                 We evaluate the proposed algorithms using real-world
                 object classification and object detection
                 applications. Experiments show that performance
                 speedups can be achieved over the original baseline
                 implementation using cuBLAS. On object detection CNNs,
                 an average speedup of $ 1.82 \times $ is obtained over
                 baseline cuBLAS in the fully connected layer of the VGG
                 model, whereas on classification CNNs, an average
                 speedup of $ 1.51 \times $ is achieved for the fully
                 connected layer of the pruned-VGG model. Energy
                 consumption reduction of 43--46\% is also observed due
                 to decreased computational and memory bandwidth
                 demands.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lu:2018:TSI,
  author =       "Sixing Lu and Roman Lysecky",
  title =        "Time and Sequence Integrated Runtime Anomaly Detection
                 for Embedded Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3122785",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network-connected embedded systems grow on a large
                 scale as a critical part of Internet of Things, and
                 these systems are under the risk of increasing malware.
                 Anomaly-based detection methods can detect malware in
                 embedded systems effectively and provide the advantage
                 of detecting zero-day exploits relative to
                 signature-based detection methods, but existing
                 approaches incur significant performance overheads and
                 are susceptible to mimicry attacks. In this article, we
                 present a formal runtime security model that defines
                 the normal system behavior including execution sequence
                 and execution timing. The anomaly detection method in
                 this article utilizes on-chip hardware to
                 non-intrusively monitor system execution through trace
                 port of the processor and detect malicious activity at
                 runtime. We further analyze the properties of the
                 timing distribution for control flow events, and select
                 subset of monitoring targets by three selection metrics
                 to meet hardware constraint. The designed detection
                 method is evaluated by a network-connected pacemaker
                 benchmark prototyped in FPGA and simulated in SystemC,
                 with several mimicry attacks implemented at different
                 levels. The resulting detection rate and false positive
                 rate considering constraints on the number of monitored
                 events supported in the on-chip hardware demonstrate
                 good performance of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ballabriga:2018:SWC,
  author =       "Cl{\'e}ment Ballabriga and Julien Forget and Giuseppe
                 Lipari",
  title =        "Symbolic {WCET} Computation",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "39:1--39:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3147413",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Parametric Worst-case execution time (WCET) analysis
                 of a sequential program produces a formula that
                 represents the worst-case execution time of the
                 program, where parameters of the formula are
                 user-defined parameters of the program (as loop bounds,
                 values of inputs, or internal variables, etc). In this
                 article we propose a novel methodology to compute the
                 parametric WCET of a program. Unlike other algorithms
                 in the literature, our method is not based on Integer
                 Linear Programming (ILP). Instead, we follow an
                 approach based on the notion of symbolic computation of
                 WCET formulae. After explaining our methodology and
                 proving its correctness, we present a set of
                 experiments to compare our method against the state of
                 the art. We show that our approach dominates other
                 parametric analyses and produces results that are very
                 close to those produced by non-parametric ILP-based
                 approaches, while keeping very good computing time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dutt:2018:ADA,
  author =       "Sunil Dutt and Sukumar Nandi and Gaurav Trivedi",
  title =        "Analysis and Design of Adders for Approximate
                 Computing",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "40:1--40:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3131274",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The concept of approximate computing, that is, to
                 sacrifice computation quality for computation efforts,
                 has recently emerged as a promising design approach.
                 Over the past decade, several research works have
                 explored approximate computing at both the software
                 level and hardware level of abstraction with
                 encouraging results. At the hardware level of
                 abstraction, adders (being the fundamental and most
                 widely used data operators in digital systems) have
                 attracted a significant attention for approximation. In
                 this article, we first explain briefly the
                 need/significance of approximate adders. We then
                 propose four Approximate Full Adders (AFAs) for
                 high-performance energy-efficient approximate
                 computing. The key design objective behind the proposed
                 AFAs is to curtail the length of carry propagation
                 subjected to minimal error rate. Next, we exploit one
                 of the proposed AFAs (optimal one) to construct an
                 N-bit approximate adder that hereinafter is referred as
                 ``ApproxADD.'' An emergent property of ApproxADD is
                 that carries do not propagate in it, and, consequently,
                 it provides bit-width-aware constant delay (O(1)).
                 ApproxADD also provides improvement in dynamic power
                 consumption by 46.31\% and in area by 28.57\% w.r.t.
                 Ripple Carry Adder (RCA), which exhibits the lowest
                 power and area. Although ApproxADD provides a
                 significant improvement in delay, power, and area, it
                 may not be preferred for some of the error-resilient
                 applications because its: (i) Error Distance (ED) is
                 too high; and (ii) Error Rate (ER) increases rapidly
                 with bit-width ($N$). To improve ED and ER, we exploit
                 the concept of carry-lifetime and Error Detection and
                 Correction logic, respectively. In this way, we
                 introduce two more (improved) versions of
                 ApproxADD--ApproxADD $ \upsilon $ 1 and ApproxADD. We
                 call these as ApproxADD $ \upsilon $ 1 and ApproxADD $
                 \upsilon $ 2 with existing approximate adders based on
                 conventional design metrics and approximate computing
                 design metrics. Furthermore, to inspect effectiveness
                 of the proposed approach in real-life applications, we
                 demonstrate image compression and decompression by
                 replacing the conventional addition operations in
                 Discrete Cosine Transform (DCT) and Inverse Discrete
                 Cosine Transform (IDCT) modules with ApproxADD $
                 \upsilon $ 2.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leech:2018:RPP,
  author =       "Charles Leech and Charan Kumar and Amit Acharyya and
                 Sheng Yang and Geoff V. Merrett and Bashir M.
                 Al-Hashimi",
  title =        "Runtime Performance and Power Optimization of Parallel
                 Disparity Estimation on Many-Core Platforms",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "41:1--41:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3133560",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article investigates the use of many-core systems
                 to execute the disparity estimation algorithm, used in
                 stereo vision applications, as these systems can
                 provide flexibility between performance scaling and
                 power consumption. We present a learning-based runtime
                 management approach that achieves a required
                 performance threshold while minimizing power
                 consumption through dynamic control of frequency and
                 core allocation. Experimental results are obtained from
                 a 61-core Intel Xeon Phi platform for the
                 aforementioned investigation. The same performance can
                 be achieved with an average reduction in power
                 consumption of 27.8\% and increased energy efficiency
                 by 30.04\% when compared to Dynamic Voltage and
                 Frequency Scaling control alone without runtime
                 management.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2018:FRT,
  author =       "Ganghee Lee and Ediz Cetin and Oliver Diessel",
  title =        "Fault Recovery Time Analysis for Coarse-Grained
                 Reconfigurable Architectures",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "42:1--42:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3140944",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Coarse-grained reconfigurable architectures (CGRAs)
                 have drawn increasing attention due to their
                 performance and flexibility advantages. Typically,
                 CGRAs incorporate many processing elements in the form
                 of an array, which is suitable for implementing spatial
                 redundancy, as used in the design of fault-tolerant
                 systems. This article introduces a recovery time model
                 for transient faults in CGRAs. The proposed
                 fault-tolerant CGRAs are based on triple modular
                 redundancy and coding techniques for error detection
                 and correction. To evaluate the model, several kernels
                 from space computing are mapped onto the suggested
                 architecture. We demonstrate the tradeoff between
                 recovery time, performance, and area. In addition, the
                 average execution time of an application including
                 recovery time is evaluated using area-based error-rate
                 estimates in harsh radiation environments. The results
                 show that task partitioning is important for bounding
                 the recovery time of applications that have long
                 execution times. It is also shown that error-correcting
                 code (ECC) is of limited practical value for tasks with
                 long execution times in high radiation environments, or
                 when the degree of task partitioning is high.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Harrison:2018:CPR,
  author =       "David C. Harrison and Winston K. G. Seah and Ramesh
                 Rayudu",
  title =        "Coverage Preservation with Rapid Forwarding in
                 Energy-Harvesting Wireless Sensor Networks for Critical
                 Rare Events",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "43:1--43:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3140961",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless sensor networks for rarely occurring critical
                 events must maintain sensing coverage and low-latency
                 network connectivity to ensure event detection and
                 subsequent rapid propagation of notification messages.
                 Few algorithms have been proposed that address both
                 coverage and forwarding and those that do are either
                 unconcerned with rapid propagation or are not optimised
                 to handle the constant changes in topology observed in
                 duty-cycled networks. This article proposes an
                 algorithm for Coverage Preservation with Rapid
                 Forwarding (CPRF). The algorithm is shown to deliver
                 perfect coverage maintenance and low-latency guaranteed
                 message propagation whilst allowing stored-charge
                 conservation via collaborative duty cycling in
                 energy-harvesting networks. Favourable comparisons are
                 made against established and recently proposed
                 algorithms in both sparse planned and dense random
                 distributions. Further, an implementation for
                 commercially available wireless sensing devices is
                 evaluated for detection and notification of damage to
                 highway light poles caused by vortex shedding.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2018:ECB,
  author =       "He Li and Kaoru Ota and Mianxiong Dong",
  title =        "Energy Cooperation in Battery-Free Wireless
                 Communications with Radio Frequency Energy Harvesting",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "44:1--44:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3141249",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Radio frequency (RF) energy harvesting techniques are
                 becoming a potential method to power battery-free
                 wireless networks. In RF energy harvesting
                 communications, energy cooperation enables shaping and
                 optimization of the energy arrivals at the
                 energy-receiving node to improve the overall system
                 performance. In this article, we propose an energy
                 cooperation scheme that enables energy cooperation in
                 battery-free wireless networks with RF harvesting. We
                 first study the battery-free wireless network with RF
                 energy harvesting and then state the problem that
                 optimizing the system performance with limited
                 harvesting energy through new energy cooperation
                 protocol. Finally, from the extensive simulation
                 results, our energy cooperation protocol performs
                 better than the original battery-free wireless network
                 solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2018:SCG,
  author =       "Jurn-Gyu Park and Chen-Ying Hsieh and Nikil Dutt and
                 Sung-Soo Lim",
  title =        "Synergistic {CPU--GPU} Frequency Capping for
                 Energy-Efficient Mobile Games",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "45:1--45:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3145337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Mobile platforms are increasingly using Heterogeneous
                 Multiprocessor Systems-on-Chip (HMPSoCs) with
                 differentiated processing cores and GPUs to achieve
                 high performance for graphics-intensive applications
                 such as mobile games. Traditionally, separate CPU and
                 GPU governors are deployed in order to achieve energy
                 efficiency through Dynamic Voltage Frequency Scaling
                 (DVFS) but miss opportunities for further energy
                 savings through coordinated system-level application of
                 DVFS. We present a cooperative CPU-GPU DVFS strategy
                 (called Co-Cap) that orchestrates energy-efficient CPU
                 and GPU DVFS through synergistic CPU and GPU frequency
                 capping to avoid frequency overprovisioning while
                 maintaining desired performance. Unlike traditional
                 approaches that target a narrow set of mobile games,
                 our Co-Cap approach is applicable across a wide range
                 of microbenchmarks and mobile games. Our methodology
                 employs a systematic training phase using fine-grained
                 refinement steps with evaluations of frequency capping
                 tables followed by a deployment phase, allowing
                 deployment across a wide range of microbenchmarks and
                 mobile games with varying graphics workloads. Our
                 experimental results across multiple sets of over 200
                 microbenchmarks and 40 mobile games show that Co-Cap
                 improves energy per frame by on average 8.9\% (up to
                 18.3\%) and 7.8\% (up to 27.6\%) (16.6\% and 15.7\% in
                 CPU-dominant applications) and achieves minimal
                 frames-per-second (FPS) loss by 0.9\% and 0.85\% (1.3\%
                 and 1.5\% in CPU-dominant applications) on average in
                 training and deployment sets, respectively, compared to
                 the default CPU and GPU governors, with negligible
                 overhead in execution time and power consumption on the
                 ODROID-XU3 platform.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Narayan:2018:MTR,
  author =       "Apurva Narayan and Greta Cutulenco and Yogi Joshi and
                 Sebastian Fischmeister",
  title =        "Mining Timed Regular Specifications from System
                 Traces",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "46:1--46:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3147660",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Temporal properties define the order of occurrence and
                 timing constraints on event occurrence. Such
                 specifications are important for safety-critical
                 real-time systems. We propose a framework for
                 automatically mining temporal properties that are in
                 the form of timed regular expressions (TREs) from
                 system traces. Using an abstract structure of the
                 property, the framework constructs a finite state
                 machine to serve as an acceptor. We analytically derive
                 speedup for the fragment and confirm the speedup using
                 empirical validation with synthetic traces. The
                 framework is evaluated on industrial-strength
                 safety-critical real-time applications using traces
                 with more than 1 million entries.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shoushtari:2018:SIS,
  author =       "Majid Shoushtari and Bryan Donyanavard and Luis Angel
                 D. Bathen and Nikil Dutt",
  title =        "{ShaVe-ICE}: Sharing Distributed Virtualized {SPMs} in
                 Many-Core Embedded Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "47:1--47:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traditional approaches for managing
                 software-programmable memories (SPMs) do not support
                 sharing of distributed on-chip memory resources and,
                 consequently, miss the opportunity to better utilize
                 those memory resources. Managing on-chip memory
                 resources in many-core embedded systems with
                 distributed SPMs requires runtime support to share
                 memory resources between various threads with different
                 memory demands running concurrently. Runtime SPM
                 managers cannot rely on prior knowledge about the
                 dynamically changing mix of threads that will execute
                 and therefore should be designed in a way that enables
                 SPM allocations for any unpredictable mix of threads
                 contending for on-chip memory space. This article
                 proposes ShaVe-ICE, an operating-system-level solution,
                 along with hardware support, to virtualize and
                 ultimately share SPM resources across a many-core
                 embedded system to reduce the average memory latency.
                 We present a number of simple allocation policies to
                 improve performance and energy. Experimental results
                 show that sharing SPMs could reduce the average
                 execution time of the workload up to 19.5\% and reduce
                 the dynamic energy consumed in the memory subsystem up
                 to 14\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2018:AFI,
  author =       "Zhijian He and Yao Chen and Zhaoyan Shen",
  title =        "Attitude Fusion of Inertial and Magnetic Sensor under
                 Different Magnetic Filed Distortions",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "48:1--48:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157668",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "By virtue of gravity measurement from a handheld
                 inertial measurement unit (IMU) sensor, current indoor
                 attitude estimation algorithms can provide accurate
                 roll/pitch dimension angles. Acquisition of precise
                 heading is limited by the absence of accurate magnetic
                 reference. Consequently, initial stage magnetometer
                 calibration is deployed to alleviate this bottleneck in
                 attitude fusion. However, available algorithms tackle
                 magnetic distortion based on time-invariant
                 surroundings, casting the post-calibration magnetic
                 data into unchanged ellipsoid centered in the
                 calibration place. Consequently, inaccurate fusion
                 results are formulated in a more common case of random
                 walk in time-varying magnetic indoor environment. This
                 article proposes a new fusion algorithm from various
                 kinds of IMU sensors, namely gyroscope, accelerometer,
                 and magnetometer. Compared to state-of-the-art attitude
                 fusion approaches, this article addresses the indoor
                 time-varying magnetic perturbation problem in a
                 geometric view. We propose an extend Kalman
                 filter--based algorithm based on this detailed
                 geometric model to eliminate the position-dependent
                 effect of a compass sensor. Experimental data
                 demonstrate that, under different indoor magnetic
                 distortion environments, our proposed attitude fusion
                 algorithm has the maximum angle error of 2.02${}^\circ
                 $, outperforming 7.17${}^\circ $ of a
                 gradient-declining-based algorithm. Additionally, this
                 attitude fusion result is constructed in a low-cost
                 handheld arduino core--based IMU device, which can be
                 widely applied to embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhattacharjee:2018:CRM,
  author =       "Sukanta Bhattacharjee and Yi-Ling Chen and Juinn-Dar
                 Huang and Bhargab B. Bhattacharya",
  title =        "Concentration-Resilient Mixture Preparation with
                 Digital Microfluidic Lab-on-Chip",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "49:1--49:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157094",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sample preparation plays a crucial role in almost all
                 biochemical applications, since a predominant portion
                 of biochemical analysis time is associated with sample
                 collection, transportation, and preparation. Many
                 sample-preparation algorithms are proposed in the
                 literature that are suitable for execution on
                 programmable digital microfluidic (DMF) platforms. In
                 most of the existing DMF-based sample-preparation
                 algorithms, a fixed target ratio is provided as input,
                 and the corresponding mixing tree is generated as
                 output. However, in many biochemical applications,
                 target mixtures with exact component proportions may
                 not be needed. From a biochemical perspective, it may
                 be sufficient to prepare a mixture in which the input
                 reagents may lie within a range of concentration
                 factors. The choice of a particular valid ratio,
                 however, strongly impacts solution-preparation cost and
                 time. To address this problem, we propose a
                 concentration-resilient ratio-selection method from the
                 input ratio space so that the reactant cost is
                 minimized. We propose an integer linear
                 programming--based method that terminates very fast
                 while producing the optimum solution, considering both
                 uniform and weighted cost of reagents. Experimental
                 results reveal that the proposed method can be used
                 conveniently in tandem with several existing
                 sample-preparation algorithms for improving their
                 performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2018:MCV,
  author =       "Shuoxin Lin and Jiahao Wu and Shuvra S.
                 Bhattacharyya",
  title =        "Memory-Constrained Vectorization and Scheduling of
                 Dataflow Graphs for Hybrid {CPU--GPU} Platforms",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "50:1--50:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157669",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing use of heterogeneous embedded systems
                 with multi-core CPUs and Graphics Processing Units
                 (GPUs) presents important challenges in effectively
                 exploiting pipeline, task, and data-level parallelism
                 to meet throughput requirements of digital signal
                 processing applications. Moreover, in the presence of
                 system-level memory constraints, hand optimization of
                 code to satisfy these requirements is inefficient and
                 error prone and can therefore, greatly slow down
                 development time or result in highly underutilized
                 processing resources. In this article, we present
                 vectorization and scheduling methods to effectively
                 exploit multiple forms of parallelism for throughput
                 optimization on hybrid CPU-GPU platforms, while
                 conforming to system-level memory constraints. The
                 methods operate on synchronous dataflow
                 representations, which are widely used in the design of
                 embedded systems for signal and information processing.
                 We show that our novel methods can significantly
                 improve system throughput compared to previous
                 vectorization and scheduling approaches under the same
                 memory constraints. In addition, we present a practical
                 case-study of applying our methods to significantly
                 improve the throughput of an orthogonal frequency
                 division multiplexing receiver system for wireless
                 communications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2018:HPH,
  author =       "Tian Huang and Yongxin Zhu and Yajun Ha and Xu Wang
                 and Meikang Qiu",
  title =        "A Hardware Pipeline with High Energy and Resource
                 Efficiency for {FMM} Acceleration",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "51:1--51:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157670",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The fast multipole method (FMM) is a promising
                 mathematical technique that accelerates the calculation
                 of long-ranged forces in the large-sized n-body
                 problem. Existing implementations of the FMM on
                 general-purpose processors are energy and resource
                 inefficient. To mitigate these issues, we propose a
                 hardware pipeline that accelerates three key FMM steps.
                 The pipeline improves energy efficiency by exploiting
                 fine-granularity parallelism of the FMM. We reuse the
                 pipeline for different FMM steps to reduce resource
                 usage by 66\%. Compared to the state-of-the-art
                 implementations on CPUs and GPUs, our implementation
                 requires 15\% less energy and delivers 2.61 times more
                 floating-point operations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Qian:2018:ECD,
  author =       "Kun Qian and Chenshu Wu and Zheng Yang and Yunhao Liu
                 and Fugui He and Tianzhang Xing",
  title =        "Enabling Contactless Detection of Moving Humans with
                 Dynamic Speeds Using {CSI}",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "52:1--52:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3157677",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Device-free passive detection is an emerging
                 technology to detect whether there exist any moving
                 entities in the areas of interest without attaching any
                 device to them. It is an essential primitive for a
                 broad range of applications including intrusion
                 detection for safety precautions, patient monitoring in
                 hospitals, child and elder care at home, and so forth.
                 Despite the prevalent signal feature Received Signal
                 Strength (RSS), most robust and reliable solutions
                 resort to a finer-grained channel descriptor at the
                 physical layer, e.g., the Channel State Information
                 (CSI) in the 802.11n standard. Among a large body of
                 emerging techniques, however, few of them have explored
                 the full potential of CSI for human detection.
                 Moreover, space diversity supported by nowadays popular
                 multiantenna systems are not investigated to a
                 comparable extent as frequency diversity. In this
                 article, we propose a novel scheme for device-free
                 PAssive Detection of moving humans with dynamic Speed
                 (PADS). Both full information (amplitude and phase) of
                 CSI and space diversity across multiantennas in MIMO
                 systems are exploited to extract and shape sensitive
                 metrics for accuracy and robust target detection. We
                 prototype PADS on commercial WiFi devices, and
                 experiment results in different scenarios demonstrate
                 that PADS achieves great performance improvement in
                 spite of dynamic human movements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guo:2018:CSP,
  author =       "Danlu Guo and Mohamed Hassan and Rodolfo Pellizzoni
                 and Hiren Patel",
  title =        "A Comparative Study of Predictable {DRAM}
                 Controllers",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "53:1--53:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3158208",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recently, the research community has introduced
                 several predictable dynamic random-access memory (DRAM)
                 controller designs that provide improved worst-case
                 timing guarantees for real-time embedded systems. The
                 proposed controllers significantly differ in terms of
                 arbitration, configuration, and simulation environment,
                 making it difficult to assess the contribution of each
                 approach. To bridge this gap, this article provides the
                 first comprehensive evaluation of state-of-the-art
                 predictable DRAM controllers. We propose a
                 categorization of available controllers, and introduce
                 an analytical performance model based on worst-case
                 latency. We then conduct an extensive evaluation for
                 all state-of-the-art controllers based on a common
                 simulation platform, and discuss findings and
                 recommendations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mozaffari-Kermani:2018:ERE,
  author =       "Mehran Mozaffari-Kermani and Reza Azarderakhsh and
                 Ausmita Sarker and Amir Jalali",
  title =        "Efficient and Reliable Error Detection Architectures
                 of Hash-Counter-Hash Tweakable Enciphering Schemes",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "54:1--54:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3159173",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Through pseudorandom permutation, tweakable
                 enciphering schemes (TES) constitute block cipher modes
                 of operation which perform length-preserving
                 computations. The state-of-the-art research has focused
                 on different aspects of TES, including implementations
                 on hardware [field-programmable gate array (FPGA)/
                 application-specific integrated circuit (ASIC)] and
                 software (hard/soft-core microcontrollers) platforms,
                 algorithmic security, and applicability to sensitive,
                 security-constrained usage models. In this article, we
                 propose efficient approaches for protecting such
                 schemes against natural and malicious faults.
                 Specifically, noting that intelligent attackers do not
                 merely get confined to injecting multiple faults, one
                 major benchmark for the proposed schemes is evaluation
                 toward biased and burst fault models. We evaluate a
                 variant of TES, i.e., the Hash-Counter-Hash scheme,
                 which involves polynomial hashing as other variants are
                 either similar or do not constitute finite field
                 multiplication which, by far, is the most involved
                 operation in TES. In addition, we benchmark the
                 overhead and performance degradation on the ASIC
                 platform. The results of our error injection
                 simulations and ASIC implementations show the
                 suitability of the proposed approaches for a wide range
                 of applications including deeply embedded systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Real:2018:ADS,
  author =       "Maria M{\'e}ndez Real and Philipp Wehner and Vianney
                 Lapotre and Diana G{\"o}hringer and Guy Gogniat",
  title =        "Application Deployment Strategies for Spatial
                 Isolation on Many-Core Accelerators",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "55:1--55:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3168383",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Current cache Side-Channel Attacks (SCAs)
                 countermeasures have not been designed for many-core
                 architectures and need to be revisited in order to be
                 practical for these new technologies. Spatial isolation
                 of resources for sensitive applications has been
                 proposed taking advantage of the large number of
                 resources offered by these architectures. This solution
                 avoids cache sharing with sensitive processes.
                 Consequently, their cache activity cannot be monitored
                 and cache SCAs cannot be performed. This work focuses
                 on the implementation of this technique in order to
                 minimize the induced performance overhead. Different
                 strategies for the management of isolated secure zones
                 are implemented and compared.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sui:2018:LOP,
  author =       "Yulei Sui and Xiaokang Fan and Hao Zhou and Jingling
                 Xue",
  title =        "Loop-Oriented Pointer Analysis for Automatic {SIMD}
                 Vectorization",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "56:1--56:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3168364",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Compiler-based vectorization represents a promising
                 solution to automatically generate code that makes
                 efficient use of modern CPUs with SIMD extensions. Two
                 main auto-vectorization techniques, superword-level
                 parallelism vectorization (SLP) and loop-level
                 vectorization (LLV), require precise dependence
                 analysis on arrays and structs to vectorize isomorphic
                 scalar instructions (in the case of SLP) and reduce
                 dynamic dependence checks at runtime (in the case of
                 LLV). The alias analyses used in modern vectorizing
                 compilers are either intra-procedural (without tracking
                 inter-procedural data-flows) or inter-procedural (by
                 using field-sensitive models, which are too imprecise
                 in handling arrays and structs). This article proposes
                 an inter-procedural Loop-oriented Pointer Analysis for
                 C, called Lpa, for analyzing arrays and structs to
                 support aggressive SLP and LLV optimizations
                 effectively. Unlike field-insensitive solutions that
                 pre-allocate objects for each memory allocation site,
                 our approach uses a lazy memory model to generate
                 access-based location sets based on how structs and
                 arrays are accessed. Lpa can precisely analyze arrays
                 and nested aggregate structures to enable SIMD
                 optimizations for large programs. By separating the
                 location set generation as an independent concern from
                 the rest of the pointer analysis, Lpa is designed so
                 that existing points-to resolution algorithms (e.g.,
                 flow-insensitive and flow-sensitive pointer analysis)
                 can be reused easily. We have implemented L pa fully in
                 the LLVM compiler infrastructure (version 3.8.0). We
                 evaluate Lpa by considering SLP and LLV, the two
                 classic vectorization techniques, on a set of 20 C and
                 Fortran CPU2000/2006 benchmarks. For SLP, Lpa
                 outperforms LLVM's BasicAA and ScevAA by discovering
                 139 and 273 more vectorizable basic blocks,
                 respectively, resulting in the best speedup of 2.95\%
                 for 173.applu. For LLV, LLVM introduces totally 551 and
                 652 static bound checks under BasicAA and ScevAA,
                 respectively. In contrast, Lpa has reduced these static
                 checks to 220, with an average of 15.7 checks per
                 benchmark, resulting in the best speedup of 7.23\% for
                 177.mesa.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2018:TES,
  author =       "Feng Li and Yanbing Yang and Zicheng Chi and Liya Zhao
                 and Yaowen Yang and Jun Luo",
  title =        "{Trinity}: Enabling Self-Sustaining {WSNs} Indoors
                 with Energy-Free Sensing and Networking",
  journal =      j-TECS,
  volume =       "17",
  number =       "2",
  pages =        "57:1--57:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173039",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:34 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Whereas a lot of efforts have been put on energy
                 conservation in wireless sensor networks (WSNs), the
                 limited lifetime of these systems still hampers their
                 practical deployments. This situation is further
                 exacerbated indoors, as conventional energy harvesting
                 (e.g., solar) may not always work. To enable long-lived
                 indoor sensing, we report in this article a
                 self-sustaining sensing system that draws energy from
                 indoor environments, adapts its duty-cycle to the
                 harvested energy, and pays back the environment by
                 enhancing the awareness of the indoor microclimate
                 through an ``energy-free'' sensing. First of all, given
                 the pervasive operation of heating, ventilation, and
                 air conditioning (HVAC) systems indoors, our system
                 harvests energy from airflow introduced by the HVAC
                 systems to power each sensor node. Secondly, as the
                 harvested power is tiny, an extremely low but
                 synchronous duty-cycle has to be applied whereas the
                 system gets no energy surplus to support existing
                 synchronization schemes. So, we design two
                 complementary synchronization schemes that cost
                 virtually no energy. Finally, we exploit the feature of
                 our harvester to sense the airflow speed in an
                 energy-free manner. To our knowledge, this is the first
                 indoor wireless sensing system that encapsulates energy
                 harvesting, network operating, and sensing all
                 together.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2018:EUE,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: To Use or Not To? {Embedded} Systems for
                 Voting",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "58:1--58:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3206342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Morse:2018:LAW,
  author =       "Jeremy Morse and Steve Kerrison and Kerstin Eder",
  title =        "On the Limitations of Analyzing Worst-Case Dynamic
                 Energy of Processing",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "59:1--59:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173042",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article examines dynamic energy consumption
                 caused by data during software execution on deeply
                 embedded microprocessors, which can be significant on
                 some devices. In worst-case energy consumption
                 analysis, energy models are used to find the most
                 costly execution path. Taking each instruction's
                 worst-case energy produces a safe but overly
                 pessimistic upper bound. Algorithms for safe and tight
                 bounds would be desirable. We show that finding exact
                 worst-case energy is NP-hard, and that tight bounds
                 cannot be approximated with guaranteed safety. We
                 conclude that any energy model targeting tightness must
                 either sacrifice safety or accept overapproximation
                 proportional to data-dependent energy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2018:CIA,
  author =       "Hwajeong Seo and Ilwoong Jeong and Jungkeun Lee and
                 Woo-Hwan Kim",
  title =        "Compact Implementations of {ARX}-Based Block Ciphers
                 on {IoT} Processors",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "60:1--60:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173455",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present implementations for
                 Addition, Rotation, and eXclusive-or (ARX)-based block
                 ciphers, including LEA and HIGHT, on IoT devices,
                 including 8-bit AVR, 16-bit MSP, 32-bit ARM, and 32-bit
                 ARM-NEON processors. We optimized 32-/8-bitwise ARX
                 operations for LEA and HIGHT block ciphers by
                 considering variations in word size, the number of
                 general purpose registers, and the instruction set of
                 the target IoT devices. Finally, we achieved the most
                 compact implementations of LEA and HIGHT block ciphers.
                 The implementations were fairly evaluated through the
                 Fair Evaluation of Lightweight Cryptographic Systems
                 framework, and implementations won the competitions in
                 the first and the second rounds.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hong:2018:ISP,
  author =       "Ding-Yong Hong and Yu-Ping Liu and Sheng-Yu Fu and
                 Jan-Jan Wu and Wei-Chung Hsu",
  title =        "Improving {SIMD} Parallelism via Dynamic Binary
                 Translation",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "61:1--61:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173456",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent trends in SIMD architecture have tended toward
                 longer vector lengths, and more enhanced SIMD features
                 have been introduced in newer vector instruction sets.
                 However, legacy or proprietary applications compiled
                 with short-SIMD ISA cannot benefit from the long-SIMD
                 architecture that supports improved parallelism and
                 enhanced vector primitives, resulting in only a small
                 fraction of potential peak performance. This article
                 presents a dynamic binary translation technique that
                 enables short-SIMD binaries to exploit benefits of new
                 SIMD architectures by rewriting short-SIMD loop code.
                 We propose a general approach that translates loops
                 consisting of short-SIMD instructions to
                 machine-independent IR, conducts SIMD loop
                 transformation/optimization at this IR level, and
                 finally translates to long-SIMD instructions. Two
                 solutions are presented to enforce SIMD load/store
                 alignment, one for the problem caused by the binary
                 translator's internal translation condition and one
                 general approach using dynamic loop peeling
                 optimization. Benchmark results show that average
                 speedups of $ 1.51 \times $ and $ 2.48 \times $ are
                 achieved for an ARM NEON to x86 AVX2 and x86 AVX-512
                 loop transformation, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2018:PEP,
  author =       "Jiutian Zhang and Yuhang Liu and Haifeng Li and
                 Xiaojing Zhu and Mingyu Chen",
  title =        "{PTAT}: an Efficient and Precise Tool for Tracing and
                 Profiling Detailed {TLB} Misses",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "62:1--62:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182174",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As the memory access footprints of applications in
                 areas like data analytics increase, the latency
                 overhead of translation lookaside buffer (TLB) misses
                 increases. Thus, the efficiency of TLB becomes
                 increasingly critical for overall system performance.
                 Analyzing TLB miss traces is useful for hardware
                 architecture design and software application
                 optimization. Utilizing cycle-accurate simulators or
                 instrumentation tools is very time-consuming and/or
                 inaccurate for tracing and profiling TLB misses. In
                 this article, we propose an efficient and precise tool
                 to collect and profile last-level TLB misses. This tool
                 utilizes a novel software method called Page Table
                 Access Tracing (PTAT), storing last-level page table
                 entries of certain workload processes into a reserved
                 uncached memory region. Therefore, each last-level TLB
                 miss incurred by user process corresponds to one
                 uncached page table access to main memory, which can be
                 captured and recorded by a hardware memory bus monitor.
                 The detected information is then dumped into offline
                 storage. In this manner, full TLB miss traces are
                 collected and can be analyzed flexibly. Compared to
                 previous software-based methods, this method achieves
                 higher performance. Experiments show that, compared
                 with a state-of-the-art kernel instrumentation method
                 (BadgerTrap), which lacks complete dumping trace
                 function, the speedup is still up to 3.88-fold for
                 memory-intensive benchmarks. Due to the improved
                 efficiency and completeness of tracing, case studies
                 validate that more flexible profiling can be conducted,
                 which is of great significance for TLB performance
                 optimization. The accuracy of PTAT is verified by both
                 dedicated sequence and performance counters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hosseinabady:2018:DEM,
  author =       "Mohammad Hosseinabady and Jose Luis Nunez-Yanez",
  title =        "Dynamic Energy Management of {FPGA} Accelerators in
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "63:1--63:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182172",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we investigate how to utilise an
                 Field-Programmable Gate Array (FPGA) in an embedded
                 system to save energy. For this purpose, we study the
                 energy efficiency of a hybrid FPGA-CPU device that can
                 switch task execution between hardware and software
                 with a focus on periodic tasks. To increase the
                 applicability of this task switching, we also consider
                 the voltage and frequency scaling (VFS) applied to the
                 FPGA to reduce the system energy consumption. We show
                 that in some cases, if the task's period is higher than
                 a specific level, the FPGA accelerator cannot reduce
                 the energy consumption associated to the task and the
                 software version is the most energy efficient option.
                 We have applied the proposed techniques to a robot map
                 creation algorithm as a case study which shows up to
                 38\% energy reduction compared to the FPGA
                 implementation. Overall, experimental results show up
                 to 48\% energy reduction by applying the proposed
                 techniques at runtime on 13 individual tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2018:OND,
  author =       "Hyeonggyu Kim and Minho Ju and Soontae Kim",
  title =        "{OnNetwork+}: Network Delay-Aware Management for
                 Mobile Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "64:1--64:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182171",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network errors such as packet losses consume large
                 amounts of energy. We analyzed the reason for this
                 through measurements using the latest smartphones and
                 full-system simulation. We found that on packet losses
                 the smartphones maintain high frequencies for CPU
                 without doing useful work. To address this problem, we
                 propose a method for reducing the energy consumption by
                 lowering the performance level by exploiting a dynamic
                 voltage and frequency scaling mechanism when long
                 network delays are expected. According to our
                 experiments, our method reduces the total energy
                 consumption of web browsing on two different
                 smartphones by up to 10.0\% and 11.5\%, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsoutsouras:2018:HDR,
  author =       "Vasileios Tsoutsouras and Iraklis Anagnostopoulos and
                 Dimosthenis Masouros and Dimitrios Soudris",
  title =        "A Hierarchical Distributed Runtime Resource Management
                 Scheme for {NoC}-Based Many-Cores",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "65:1--65:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182173",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As technology constantly strengthens its presence in
                 all aspects of human life, computing systems integrate
                 a high number of processing cores, whereas applications
                 become more complex and greedy for computational
                 resources. Inevitably, this high increase in processing
                 elements combined with the unpredictable resource
                 requirements of executed applications at design time
                 impose new design constraints to resource management of
                 many-core systems, turning the distributed
                 functionality into a necessity. In this work, we
                 present a distributed runtime resource management
                 framework for many-core systems utilizing a
                 network-on-chip (NoC) infrastructure. Specifically, we
                 couple the concept of distributed management with
                 parallel applications by assigning different roles to
                 the available computing resources. The presented design
                 is based on the idea of local controllers and managers,
                 whereas an on-chip intercommunication scheme ensures
                 decision distribution. The evaluation of the proposed
                 framework was performed on an Intel Single-Chip Cloud
                 Computer, an actual NoC-based, many-core system.
                 Experimental results show that the proposed scheme
                 manages to allocate resources efficiently at runtime,
                 leading to gains of up to 30\% in application execution
                 latency compared to relevant state-of-the-art
                 distributed resource management frameworks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2018:CSI,
  author =       "Hwajeong Seo",
  title =        "Compact Software Implementation of Public-Key
                 Cryptography on {MSP430X}",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "66:1--66:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3190855",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "On the low-end embedded processors, the
                 implementations of Elliptic Curve Cryptography (ECC)
                 are considered to be a challenging task due to the
                 limited computation power and storage of the low-end
                 embedded processors. Particularly, the multi-precision
                 multiplication and squaring operations are the most
                 expensive operations for ECC implementations. In order
                 to enhance the performance, many works presented
                 efficient multiplication and squaring routines on the
                 target devices. Recent works show that 128-bit security
                 level ECC is available within a second and this is
                 practically fast enough for IoT services. However,
                 previous approaches missed the other important storage
                 issues (i.e., program size, ROM). Considering that the
                 embedded processors only have a few KB ROM, we need to
                 pay attention to the compact ROM size with reasonable
                 performance. In this article, we present very compact
                 and generic implementations of multiplication and
                 squaring operations on the 16-bit MSP430X processors
                 for the ECC. The implementations utilize the new 32-bit
                 multiplier and advanced multiplication and squaring
                 routines. Since the proposed routines are generic, the
                 arbitrary length of operand is available with
                 high-speed and small code size. With proposed
                 multiplication and squaring routines, we implemented
                 Curve25519 on the MSP430X processors. The scalar
                 multiplication is performed within 6,666,895 clock
                 cycles and 4,054 bytes. Compared with previous works
                 based on the speed-optimized version, our
                 memory-efficient version reduces the code size by
                 59.8\%, sacrificing the execution timing by 20.5\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yassin:2018:AAC,
  author =       "Yahya H. Yassin and Francky Catthoor and Fabian
                 Kloosterman and Jyh-Jang Sun and Jo{\~a}O Couto and Per
                 Gunnar Kjeldsberg and Nick {Van Helleputte}",
  title =        "Algorithm\slash Architecture Co-optimisation Technique
                 for Automatic Data Reduction of Wireless Read-Out in
                 High-Density Electrode Arrays",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "67:1--67:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3190854",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "High-density electrode arrays used to read out neural
                 activity will soon surpass the limits of the amount of
                 data that can be transferred within reasonable energy
                 budgets. This is true for wired brain implants when the
                 required bandwidth becomes very high, and even more so
                 for untethered brain implants that require wireless
                 transmission of data. We propose an energy-efficient
                 spike data extraction solution for high-density
                 electrode arrays, capable of reducing the data to be
                 transferred by over 85\%. We combine temporal and
                 spatial spike data analysis with low implementation
                 complexity, where amplitude thresholds are used to
                 detect spikes and the spatial location of the
                 electrodes is used to extract potentially useful
                 sub-threshold data on neighboring electrodes. We tested
                 our method against a state-of-the-art spike detection
                 algorithm, with prohibitively high implementation
                 complexity, and found that the majority of spikes are
                 extracted reliably. We obtain further improved quality
                 results when ignoring very small spikes below 30\% of
                 the voltage thresholds, resulting in 91\% accuracy. Our
                 approach uses digital logic and is therefore scalable
                 with an increasing number of electrodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hammari:2018:RPD,
  author =       "Elena Hammari and Per Gunnar Kjeldsberg and Francky
                 Catthoor",
  title =        "Runtime Precomputation of Data-Dependent Parameters in
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "68:1--68:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3191311",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In many modern embedded systems, the available
                 resources (e.g., CPU clock cycles, memory, and energy)
                 are consumed nonuniformly while the system is under
                 exploitation. Typically, the resource requirements in
                 the system change with different input data that the
                 system process. These data trigger different parts of
                 the embedded software, resulting in different
                 operations executed that require different hardware
                 platform resources to be used. A significant research
                 effort has been dedicated to develop mechanisms for
                 runtime resource management (e.g., branch prediction
                 for pipelined processors, prefetching of data from main
                 memory to cache, and scenario-based design
                 methodologies). All these techniques rely on the
                 availability of information at runtime about upcoming
                 changes in resource requirements. In this article, we
                 propose a method for detecting upcoming resource
                 changes based on preliminary calculation of software
                 variables that have the most dynamic impact on resource
                 requirements in the system. We apply the method on a
                 modified real-life biomedical algorithm with real input
                 data and estimate a 40\% energy reduction as compared
                 to static DVFS scheduling. Comparing to dynamic DVFS
                 scheduling, an 18\% energy reduction is demonstrated.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yoon:2018:SAF,
  author =       "Su-Kyung Yoon and Jitae Yun and Jung-Geun Kim and
                 Shin-Dug Kim",
  title =        "Self-Adaptive Filtering Algorithm with {PCM}-Based
                 Memory Storage System",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "69:1--69:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3190856",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a new phase change memory- (PCM)
                 based memory storage architecture with associated
                 self-adaptive data filtering for various embedded
                 devices to support energy efficiency as well as high
                 computing power. In this approach, PCM-based memory
                 storage can be used as working memory and mass storage
                 layers simultaneously, and a self-adaptive data
                 filtering module composed of small DRAM dual buffers
                 was designed to improve unfavorable PCM features, such
                 as asymmetric read/write access latencies and limited
                 endurance and enhance spatial/temporal localities. In
                 particular, the self-adaptive data filtering algorithm
                 enhances data reusability by screening potentially high
                 reusable data and predicting adequate lifetime of those
                 data depending on current victim time decision value.
                 We also propose the possibility that a small amount of
                 DRAM buffer is embedded into mobile processors, keeping
                 this as small as possible for cost effectiveness and
                 energy efficiency. Experimental results show that by
                 exploiting a small amount of DRAM space for dual
                 buffers and using the self-adaptive filtering algorithm
                 to manage them, the proposed system can reduce
                 execution time by a factor of 1.9 compared to the
                 unified conventional model with same the DRAM capacity
                 and can be considered comparable to 1.5$ \times $ DRAM
                 capacity.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Amanollahi:2018:ERD,
  author =       "Saba Amanollahi and Ghassem Jaberipur",
  title =        "Extended Redundant-Digit Instruction Set for
                 Energy-Efficient Processors",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "70:1--70:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3202664",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The impact of extending the instruction set
                 architecture (ISA) of a conventional binary processor
                 by a set of redundant-digit arithmetic instructions is
                 studied. Selected binary arithmetic instructions within
                 a given code sequence are replaced with appropriate
                 redundant-digit ones. The selection criteria is so
                 enforced to lead to overall reduction of execution
                 energy and energy-delay product (EDP). A special branch
                 and bound algorithm is devised to modify the dataflow
                 graph (DFG) to a new one that takes advantage of the
                 extended redundant-digit instruction set. The DFG is
                 obtained, via an in-house tool, from the intermediate
                 code representation that is normally produced by the
                 utilized compiler. The required redundant-digit
                 arithmetic operations (including a multiplier, a
                 multiply accumulator, and three- to four-operand
                 redundant-digit adders specially designed for this
                 work) have been synthesized on 45nm NanGate technology
                 by a Synopsys Design Compiler. To evaluate the impact
                 of the proposed ISA augmentation on actual code
                 execution, the simulation and evaluation platform of
                 our choice is an MIPS processor whose ISA is extended
                 by the proposed redundant-digit instructions. Several
                 digital signal processing benchmarks are utilized as
                 the source of the baseline MIPS codes, which are
                 converted (via the aforementioned algorithm) to the
                 equivalent mixed binary/redundant-digit codes. Our
                 experiments, as such, show up to 26\% energy and 44\%
                 EDP savings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Andersson:2018:SAT,
  author =       "BJ{\"o}rn Andersson and Hyoseung Kim and Dionisio {De
                 Niz} and Mark Klein and Ragunathan (Raj) Rajkumar and
                 John Lehoczky",
  title =        "Schedulability Analysis of Tasks with
                 Corunner-Dependent Execution Times",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "71:1--71:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3203407",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Consider fixed-priority preemptive partitioned
                 scheduling of constrained-deadline sporadic tasks on a
                 multiprocessor. A task generates a sequence of jobs and
                 each job has a deadline that must be met. Assume tasks
                 have Corunner-dependent execution times; i.e., the
                 execution time of a job J depends on the set of jobs
                 that happen to execute (on other processors) at
                 instants when J executes. We present a model that
                 describes Corunner-dependent execution times. For this
                 model, we show that exact schedulability testing is
                 co-NP-hard in the strong sense. Facing this complexity,
                 we present a sufficient schedulability test, which has
                 pseudo-polynomial-time complexity if the number of
                 processors is fixed. We ran experiments with synthetic
                 software benchmarks on a quad-core Intel multicore
                 processor with the Linux/RK operating system and found
                 that for each task, its maximum measured response time
                 was bounded by the upper bound computed by our
                 theory.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vasilios:2018:CSC,
  author =       "Kelefouras Vasilios and Keramidas Georgios and Voros
                 Nikolaos",
  title =        "Combining Software Cache Partitioning and Loop Tiling
                 for Effective Shared Cache Management",
  journal =      j-TECS,
  volume =       "17",
  number =       "3",
  pages =        "72:1--72:??",
  month =        jun,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3202663",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:35 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One of the biggest challenges in multicore platforms
                 is shared cache management, especially for
                 data-dominant applications. Two commonly used
                 approaches for increasing shared cache utilization are
                 cache partitioning and loop tiling. However,
                 state-of-the-art compilers lack efficient cache
                 partitioning and loop tiling methods for two reasons.
                 First, cache partitioning and loop tiling are strongly
                 coupled together, and thus addressing them separately
                 is simply not effective. Second, cache partitioning and
                 loop tiling must be tailored to the target shared cache
                 architecture details and the memory characteristics of
                 the corunning workloads. To the best of our knowledge,
                 this is the first time that a methodology provides (1)
                 a theoretical foundation in the above-mentioned cache
                 management mechanisms and (2) a unified framework to
                 orchestrate these two mechanisms in tandem (not
                 separately). Our approach manages to lower the number
                 of main memory accesses by an order of magnitude
                 keeping at the same time the number of
                 arithmetic/addressing instructions to a minimal level.
                 We motivate this work by showcasing that cache
                 partitioning, loop tiling, data array layouts, shared
                 cache architecture details (i.e., cache size and
                 associativity), and the memory reuse patterns of the
                 executing tasks must be addressed together as one
                 problem, when a (near)-optimal solution is requested.
                 To this end, we present a search space exploration
                 analysis where our proposal is able to offer a vast
                 deduction in the required search space.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2018:EEC,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Early Career Researchers in Embedded
                 Computing",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "73:1--73:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3241724",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vatanparvar:2018:DAB,
  author =       "Korosh Vatanparvar and Mohammad Abdullah {Al
                 Faruque}",
  title =        "Design and Analysis of Battery-Aware Automotive
                 Climate Control for Electric Vehicles",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "74:1--74:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3203408",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Electric Vehicles (EV) as a zero-emission means of
                 transportation encounter challenges in battery design
                 that cause a range anxieties for the drivers. Besides
                 the electric motor, the Heating, Ventilation, and Air
                 Conditioning (HVAC) system is another major contributor
                 to the power consumption that may influence the EV
                 battery lifetime and driving range. In the
                 state-of-the-art methodologies for battery management
                 systems, the battery performance is monitored and
                 improved. While in the automotive climate control, the
                 passenger's thermal comfort is the main objective.
                 Hence, the influence of the HVAC power on the battery
                 behavior for the purpose of jointly optimized battery
                 management and climate control has not been considered.
                 In this article, we propose an automotive climate
                 control methodology that is aware of the battery
                 behavior and performance, while maintaining the
                 passenger's thermal comfort. In our methodology,
                 battery parameters and cabin temperature are modeled
                 and estimated, and the HVAC utilization is optimized
                 and adjusted with respect to the electric motor and
                 HVAC power requests. Therefore, the battery stress
                 reduces, while the cabin temperature is maintained by
                 predicting and optimizing the system states in the
                 near-future. We have implemented our methodology and
                 compared its performance to the state-of-the-art in
                 terms of battery lifetime improvement and energy
                 consumption reduction. We have also conducted
                 experiments and analyses to explore multiple control
                 window sizes, drive profiles, ambient temperatures, and
                 modeling error rates in the methodology. It is shown
                 that our battery-aware climate control can extend the
                 battery lifetime by up to 13.2\% and reduce the energy
                 consumption by up to 14.4\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2018:MAC,
  author =       "Wen Pan and Tao Xie",
  title =        "A Mirroring-Assisted Channel-{RAID5} {SSD} for Mobile
                 Applications",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "75:1--75:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3209625",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Simply applying an existing redundant array of
                 independent disks (RAID) technique to enhance data
                 reliability within a single solid-state drive for
                 safety-critical mobile applications significantly
                 degrades performance. In this article, we first propose
                 a new RAID5 architecture called channel-RAID5 with
                 mirroring (CR5M) to alleviate the performance
                 degradation problem. Next, an associated data
                 reconstruction strategy called mirroring-assisted
                 channel-level reconstruction (MCR) is developed to
                 further shrink the window of vulnerability.
                 Experimental results demonstrate that compared with
                 channel-RAID5 (CR5), CR5M improves performance up to
                 40.2\%. Compared with disk-oriented reconstruction, a
                 traditional data reconstruction scheme, MCR on average
                 improves data recovery speed by 7.5\% while delivering
                 a similar performance during reconstruction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Omar:2018:DRH,
  author =       "Hamza Omar and Qingchuan Shi and Masab Ahmad and Halit
                 Dogan and Omer Khan",
  title =        "Declarative Resilience: a Holistic Soft-Error
                 Resilient Multicore Architecture that Trades off
                 Program Accuracy for Efficiency",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "76:1--76:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3210559",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "To protect multicores from soft-error perturbations,
                 research has explored various resiliency schemes that
                 provide high soft-error coverage. However, these
                 schemes incur high performance and energy overheads. We
                 observe that not all soft-error perturbations affect
                 program correctness, and some soft-errors only affect
                 program accuracy, i.e., the program completes with
                 certain acceptable deviations from error free outcome.
                 Thus, it is practical to improve processor efficiency
                 by trading off resiliency overheads with program
                 accuracy. This article proposes the idea of declarative
                 resilience that selectively applies strong resiliency
                 schemes for code regions that are crucial for program
                 correctness (crucial code) and lightweight resiliency
                 for code regions that are susceptible to program
                 accuracy deviations as a result of soft-errors
                 (non-crucial code). At the application level, crucial
                 and non-crucial code is identified based on its impact
                 on the program outcome. A cross-layer architecture
                 enables efficient resilience along with holistic
                 soft-error coverage. Only program accuracy is
                 compromised in the worst-case scenario of a soft-error
                 strike during non-crucial code execution. For a set of
                 machine-learning and graph analytic benchmarks,
                 declarative resilience reduces performance overhead
                 over a state-of-the-art system that applies strong
                 resiliency for all program code regions from $ \approx
                 1.43 \times $ to $ \approx 1.2 \times $.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2018:SLL,
  author =       "Guan Wang and Chuanqi Zang and Lei Ju and Mengying
                 Zhao and Xiaojun Cai and Zhiping Jia",
  title =        "Shared Last-Level Cache Management and Memory
                 Scheduling for {GPGPUs} with Hybrid Main Memory",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "77:1--77:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230643",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Memory intensive workloads become increasingly popular
                 on general purpose graphics processing units (GPGPUs),
                 and impose great challenges on the GPGPU memory
                 subsystem design. On the other hand, with the recent
                 development of non-volatile memory (NVM) technologies,
                 hybrid memory combining both DRAM and NVM achieves high
                 performance, low power, and high density
                 simultaneously, which provides a promising main memory
                 design for GPGPUs. In this article, we explore the
                 shared last-level cache management for GPGPUs with
                 consideration of the underlying hybrid main memory. To
                 improve the overall memory subsystem performance, we
                 exploit the characteristics of both the asymmetric
                 read/write latency of the hybrid main memory
                 architecture, as well as the memory coalescing feature
                 of GPGPUs. In particular, to reduce the average cost of
                 L2 cache misses, we prioritize cache blocks from DRAM
                 or NVM based on observations that operations to NVM
                 part of main memory have a large impact on the system
                 performance. Furthermore, the cache management scheme
                 also integrates the GPU memory coalescing and cache
                 bypassing techniques to improve the overall system
                 performance. To minimize the impact of memory
                 divergence behaviors among simultaneously executed
                 groups of threads, we propose a hybrid main memory and
                 warp aware memory scheduling mechanism for GPGPUs.
                 Experimental results show that in the context of a
                 hybrid main memory system, our proposed L2 cache
                 management policy and memory scheduling mechanism
                 improve performance by 15.69\% on average for memory
                 intensive benchmarks, whereas the maximum gain can be
                 up to 29\% and achieve an average memory subsystem
                 energy reduction of 21.27\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2018:DFM,
  author =       "Xiaoxuan Liang and Zhangqin Huang and Shengqi Yang and
                 Lanxin Qiu",
  title =        "Device-Free Motion \& Trajectory Detection via
                 {RFID}",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "78:1--78:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230644",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Compared with traditional methods that employ inertial
                 sensors or wireless sensors, device-free approaches do
                 not require that people carry devices, and they are
                 considered a useful technique for indoor navigation and
                 posture recognition. However, few existing methods can
                 detect the trajectory and movements of humans at the
                 same time. In this study, we propose a scheme called
                 PADAR for addressing these two problems simultaneously
                 by using passive radio frequency identification (RFID)
                 tags but without attaching them to the human body. The
                 idea is based on the principle of radio tomographic
                 imaging, where the variance in a tag's backscattered
                 radio frequency signal strength is influenced by human
                 movement. We integrated a commodity off-the-shelf RFID
                 reader with a two-dimensional phased array antenna and
                 a matrix of passive tags to evaluate the performance of
                 our scheme. We conducted experiments in a simulated
                 indoor environment. The experimental results showed
                 that PADAR achieved an accuracy of over 70\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ji:2018:ACP,
  author =       "Kecheng Ji and Ming Ling and Longxing Shi and Jianping
                 Pan",
  title =        "An Analytical Cache Performance Evaluation Framework
                 for Embedded Out-of-Order Processors Using Software
                 Characteristics",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "79:1--79:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233182",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Utilizing analytical models to evaluate proposals or
                 provide guidance in high-level architecture decisions
                 is been becoming more and more attractive. A certain
                 number of methods have emerged regarding cache
                 behaviors and quantified insights in the last decade,
                 such as the stack distance theory and the memory level
                 parallelism (MLP) estimations. However, prior research
                 normally oversimplified the factors that need to be
                 considered in out-of-order processors, such as the
                 effects triggered by reordered memory instructions, and
                 multiple dependences among memory instructions, along
                 with the merged accesses in the same MSHR entry. These
                 ignored influences actually result in low and unstable
                 precisions of recent analytical models. By quantifying
                 the aforementioned effects, this article proposes a
                 cache performance evaluation framework equipped with
                 three analytical models, which can more accurately
                 predict cache misses, MLPs, and the average cache miss
                 service time, respectively. Similar to prior studies,
                 these analytical models are all fed with profiled
                 software characteristics in which case the architecture
                 evaluation process can be accelerated significantly
                 when compared with cycle-accurate simulations. We
                 evaluate the accuracy of proposed models compared with
                 gem5 cycle-accurate simulations with 16 benchmarks
                 chosen from Mobybench Suite 2.0, Mibench 1.0, and
                 Mediabench II. The average root mean square errors for
                 predicting cache misses, MLPs, and the average cache
                 miss service time are around 4\%, 5\%, and 8\%,
                 respectively. Meanwhile, the average error of
                 predicting the stall time due to cache misses by our
                 framework is as low as 8\%. The whole cache performance
                 estimation can be sped by about 15 times versus gem5
                 cycle-accurate simulations and 4 times when compared
                 with recent studies. Furthermore, we have shown and
                 studied the insights between different performance
                 metrics and the reorder buffer sizes by using our
                 models. As an application case of the framework, we
                 also demonstrate how to use our framework combined with
                 McPAT to find out Pareto optimal configurations for
                 cache design space explorations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ara:2018:SAM,
  author =       "Hadi Alizadeh Ara and Amir Behrouzian and Martijn
                 Hendriks and Marc Geilen and Dip Goswami and Twan
                 Basten",
  title =        "Scalable Analysis for Multi-Scale Dataflow Models",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "80:1--80:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233183",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multi-scale dataflow models have actors acting at
                 multiple granularity levels, e.g., a dataflow model of
                 a video processing application with operations on
                 frame, line, and pixel level. The state of the art
                 timing analysis methods for both static and dynamic
                 dataflow types aggregate the behaviours across all
                 granularity levels into one, often large iteration,
                 which is repeated without exploiting the structure
                 within such an iteration. This poses scalability issues
                 to dataflow analysis, because behaviour of the large
                 iteration is analysed by some form of simulation that
                 involves a large number of actor firings. We take a
                 fresh perspective of what is happening inside the large
                 iteration. We take advantage of the fact that the
                 iteration is a sequence of smaller behaviours, each
                 captured in a scenario, that are typically repeated
                 many times. We use the (max,+) linear model of dataflow
                 to represent each of the scenarios with a matrix. This
                 allows a compositional worst-case throughput analysis
                 of the repeated scenarios by raising the matrices to
                 the power of the number of repetitions, which scales
                 logarithmically with the number of repetitions, whereas
                 the existing throughput analysis scales linearly. We
                 moreover provide the first exact worst-case latency
                 analysis for scenario-aware dataflow. This
                 compositional latency analysis also scales
                 logarithmically when applied to multi-scale dataflow
                 models. We apply our new throughput and latency
                 analysis to several realistic applications. The results
                 confirm that our approach provides a fast and accurate
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Altawy:2018:SLT,
  author =       "Riham Altawy and Raghvendra Rohit and Morgan He and
                 Kalikinkar Mandal and Gangqiang Yang and Guang Gong",
  title =        "{SLISCP-light}: Towards Hardware Optimized
                 Sponge-specific Cryptographic Permutations",
  journal =      j-TECS,
  volume =       "17",
  number =       "4",
  pages =        "81:1--81:??",
  month =        aug,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233245",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The emerging areas in which highly resource
                 constrained devices are interacting wirelessly to
                 accomplish tasks have led manufacturers to embed
                 communication systems in them. Tiny low-end devices
                 such as sensor networks nodes and Radio Frequency
                 Identification (RFID) tags are of particular importance
                 due to their vulnerability to security attacks, which
                 makes protecting their communication privacy and
                 authenticity an essential matter. In this work, we
                 present a lightweight do-it-all cryptographic design
                 that offers the basic underlying functionalities to
                 secure embedded communication systems in tiny devices.
                 Specifically, we revisit the design approach of the
                 sLiSCP family of lightweight cryptographic
                 permutations, which was proposed in SAC 2017. sLiSCP is
                 designed to be used in a unified duplex sponge
                 construction to provide minimal overhead for multiple
                 cryptographic functionalities within one hardware
                 design. The design of sLiSCP follows a 4-subblock
                 Type-2 Generalized Feistel-like Structure (GFS) with
                 unkeyed round-reduced Simeck as the round function,
                 which are extremely efficient building blocks in terms
                 of their hardware area requirements. In SLISCP-light,
                 we tweak the GFS design and turn it into an elegant
                 Partial Substitution-Permutation Network construction,
                 which further reduces the hardware areas of the SLISCP
                 permutations by around 16\% of their original values.
                 The new design also enhances the bit diffusion and
                 algebraic properties of the permutations and enables us
                 to reduce the number of steps, thus achieving a better
                 throughput in both the hashing and authentication
                 modes. We perform a thorough security analysis of the
                 new design with respect to its diffusion, differential
                 and linear, and algebraic properties. For
                 SLISCP-light-192, we report parallel implementation
                 hardware areas of 1,820 (respectively, 1,892)GE in CMOS
                 65 nm (respectively, 130 nm) ASIC. The areas for
                 SLISCP-light-256 are 2,397 and 2,500GE in CMOS 65 nm
                 and 130 nm ASIC, respectively. Overall, the unified
                 duplex sponge mode of SLISCP-light-192, which provides
                 (authenticated) encryption and hashing functionalities,
                 satisfies the area (1,958GE), power (3.97 $ \mu $W),
                 and throughput (44.4kbps) requirements of passive RFID
                 tags.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2018:ENA,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Need for Artifact Verified Articles in
                 {{\booktitle{ACM Transactions}}}",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "82:1--82:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3282437",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3282437",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kalayappan:2018:PAH,
  author =       "Rajshekar Kalayappan and Smruti R. Sarangi",
  title =        "Providing Accountability in Heterogeneous
                 Systems-on-Chip",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "83:1--83:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3241048",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3241048",
  abstract =     "When modern systems-on-chip (SoCs), containing designs
                 from different organizations, miscompute or
                 underperform in the field, discerning the responsible
                 component is a non-trivial task. A perfectly
                 accountable system is one in which the on-chip
                 component at fault is always unambiguously detected.
                 The achievement of accountability can be greatly aided
                 by the collection of runtime information that captures
                 the events in the system that led to the error. Such
                 information collection must be fair and impartial to
                 all parties. In this article, we prove that logging
                 messages communicated between components from different
                 organizations is sufficient to provide accountability,
                 provided the logs are authentic. We then construct a
                 solution based on this premise, with an on-chip trusted
                 auditing system to authenticate the logs. We present a
                 thorough design of the auditing system, and demonstrate
                 that its performance overhead is a mere 0.49\%, and its
                 area overhead is a mere 0.194\% (in a heterogeneous 48
                 core, 400 mm$^2$ chip). We also demonstrate the
                 viability of this solution using three representative
                 bugs found in popular commercial SoCs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhuiyan:2018:EER,
  author =       "Ashikahmed Bhuiyan and Zhishan Guo and Abusayeed
                 Saifullah and Nan Guan and Haoyi Xiong",
  title =        "Energy-Efficient Real-Time Scheduling of {DAG} Tasks",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "84:1--84:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3241049",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3241049",
  abstract =     "This work studies energy-aware real-time scheduling of
                 a set of sporadic Directed Acyclic Graph (DAG) tasks
                 with implicit deadlines. While meeting all real-time
                 constraints, we try to identify the best task
                 allocation and execution pattern such that the average
                 power consumption of the whole platform is minimized.
                 To our knowledge, this is the first work that addresses
                 the power consumption issue in scheduling multiple DAG
                 tasks on multi-cores and allows intra-task processor
                 sharing. First, we adapt the decomposition-based
                 framework for federated scheduling and propose an
                 energy-sub-optimal scheduler. Then, we derive an
                 approximation algorithm to identify processors to be
                 merged together for further improvements in
                 energy-efficiency. The effectiveness of the proposed
                 approach is evaluated both theoretically via
                 approximation ratio bounds and also experimentally
                 through simulation study. Experimental results on
                 randomly generated workloads show that our algorithms
                 achieve an energy saving of 60\% to 68\% compared to
                 existing DAG task schedulers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wei:2018:SAE,
  author =       "Yi-Hung Wei and Quan Leng and Wei-Ju Chen and Aloysius
                 K. Mok and Song Han",
  title =        "Schedule Adaptation for Ensuring Reliability in
                 {RT-WiFi}-Based Networked Embedded Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "85:1--85:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3236011",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3236011",
  abstract =     "With the ever-growing interests in applying wireless
                 technologies for networked embedded systems to serve as
                 the communication fabric, many real-time wireless
                 technologies have been recently developed to support
                 time-critical sensing and control applications. We
                 proposed in previous work the RT-WiFi protocol that
                 provides real-time high-speed predictable data delivery
                 and enables designs to meet time-critical industrial
                 needs. However, without explicit reliability
                 enforcement mechanisms, our previous RT-WiFi design is
                 either subject to uncontrolled packet loss due to noise
                 and other interferences or may suffer from inefficient
                 communication channel usage. In this article, we
                 explicitly consider interference from both Wi-Fi and
                 non-Wi-Fi based interference sources and propose two
                 sets of effective solutions for reliable data
                 transmissions in RT-WiFi-based networked embedded
                 systems. To improve reliability against general
                 non-Wi-Fi based interference, based on rate adaptation
                 and retransmission techniques, we present an optimal
                 real-time rate adaption algorithm together with a
                 communication link scheduler that has low network
                 management overhead. A novel technique called
                 overbooking is introduced to further improve the
                 schedulability of the communication link scheduler
                 while maintaining the required communication
                 reliability. For Wi-Fi-based interference, we present
                 mechanisms that utilize virtual carrier sensing to
                 provide reliable data transmission while co-existing
                 with regular Wi-Fi networks. We have implemented the
                 proposed algorithms in the RT-WiFi network management
                 framework and demonstrated the system performance with
                 a series of experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sotiriou-Xanthopoulos:2018:OBV,
  author =       "Efstathios Sotiriou-Xanthopoulos and Leonard Masing
                 and Sotirios Xydis and Kostas Siozios and J{\"u}rgen
                 Becker and Dimitrios Soudris",
  title =        "{OpenCL}-based Virtual Prototyping and Simulation of
                 Many-Accelerator Architectures",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "86:1--86:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3242179",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3242179",
  abstract =     "Heterogeneous architectures featuring multiple
                 hardware accelerators have been proposed as a promising
                 solution for meeting the ever-increasing performance
                 and power requirements of embedded systems. However,
                 the existence of numerous design parameters may result
                 in different architectural schemes and thus in extra
                 design effort. To address this issue, OpenCL-based
                 frameworks have been recently utilized for FPGA
                 programming, to enable the portability of a source code
                 to multiple architectures. However, such OpenCL
                 frameworks focus on RTL design, thus not enabling rapid
                 prototyping and abstracted modeling of complex systems.
                 Virtual Prototyping aims to overcome this problem by
                 enabling the system modeling in higher abstraction
                 levels. This article combines the benefits of OpenCL
                 and Virtual Prototyping, by proposing an OpenCL-based
                 prototyping framework for data-parallel
                 many-accelerator systems, which (a) creates a SystemC
                 Virtual Platform from OpenCL, (b) provides a
                 co-simulation environment for the host and the Virtual
                 Platform, (c) offers memory and interconnection models
                 for parallel data processing, and (d) enables the
                 system evaluation with alternative real number
                 representations (e.g., fixed-point or 16-bit
                 floating-point).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sababha:2018:RBF,
  author =       "Belal H. Sababha and Yazan A. Alqudah",
  title =        "A Reconfiguration-Based Fault-Tolerant Anti-Lock
                 Brake-by-Wire System",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "87:1--87:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3242178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3242178",
  abstract =     "Anti-Lock Braking Systems (ABS) and Brake-by-Wire
                 Systems (BBW) are safety-critical applications by
                 nature. Such systems are required to demonstrate high
                 degrees of dependability. Fault-tolerance is the
                 primary means to achieve dependability at runtime and
                 has been an active research area for decades.
                 Fault-tolerance is usually achieved in traditional
                 embedded computing systems through redundancy and
                 voting methods. In such systems, hardware units,
                 actuators, sensors, and communication networks are
                 replicated where special voters vote against faulty
                 units. In addition to traditional hardware and software
                 redundancy, hybrid and reconfiguration-based approaches
                 to fault-tolerance are evolving. In this article, we
                 present a reconfiguration-based fault-tolerant approach
                 to achieve high dependability in ABS BBW braking
                 systems. The proposed architecture makes use of other
                 components of less safety-critical systems to maintain
                 high dependability in the more safety-critical systems.
                 This is achieved by migrating safety-critical software
                 tasks from embedded computer hardware that runs into a
                 malfunction to other embedded computing hardware
                 running less-critical software tasks. Or by using a
                 different configuration in terms of the used speed
                 sensors and type of ABS. The proposed architecture is
                 on average 20\% more reliable than conventional ABS
                 architectures assuming equal reliabilities of different
                 components.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jin:2018:PAR,
  author =       "Xi Jin and Nan Guan and Changqing Xia and Jintao Wang
                 and Peng Zeng",
  title =        "Packet Aggregation Real-Time Scheduling for
                 Large-Scale {WIA--PA} Industrial Wireless Sensor
                 Networks",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "88:1--88:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3266228",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3266228",
  abstract =     "The IEC standard WIA-PA is a communication protocol
                 for industrial wireless sensor networks. Its special
                 features, including a hierarchical topology, hybrid
                 centralized-distributed management and packet
                 aggregation make it suitable for large-scale industrial
                 wireless sensor networks. Industrial systems place
                 large real-time requirements on wireless sensor
                 networks. However, the WIA-PA standard does not specify
                 the transmission methods, which are vital to the
                 real-time performance of wireless networks, and little
                 work has been done to address this problem. In this
                 article, we propose a real-time aggregation scheduling
                 method for WIA-PA networks. First, to satisfy the
                 real-time constraints on dataflows, we propose a method
                 that combines the real-time theory with the classical
                 bin-packing method to aggregate original packets into
                 the minimum number of aggregated packets. The
                 simulation results indicate that our method outperforms
                 the traditional bin-packing method, aggregating up to
                 35\% fewer packets, and improves the real-time
                 performance by up to 10\%. Second, to make it possible
                 to solve the scheduling problem of WIA-PA networks
                 using the classical scheduling algorithms, we transform
                 the ragged time slots of WIA-PA networks to a universal
                 model. In the simulation, a large number of WIA-PA
                 networks are randomly generated to evaluate the
                 performances of several real-time scheduling
                 algorithms. By comparing the results, we obtain that
                 the earliest deadline first real-time scheduling
                 algorithm is the preferred method for WIA-PA
                 networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Weichslgartner:2018:DTR,
  author =       "Andreas Weichslgartner and Stefan Wildermann and
                 Deepak Gangadharan and Michael Gla{\ss} and J{\"u}rgen
                 Teich",
  title =        "A Design--Time\slash Run-Time Application Mapping
                 Methodology for Predictable Execution Time in
                 {MPSoCs}",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "89:1--89:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274665",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274665",
  abstract =     "Executing multiple applications on a single MPSoC
                 brings the major challenge of satisfying multiple
                 quality requirements regarding real-time, energy, and
                 so on. Hybrid application mapping denotes the
                 combination of design-time analysis with run-time
                 application mapping. In this article, we present such a
                 methodology, which comprises a design space exploration
                 coupled with a formal performance analysis. This
                 results in several resource reservation configurations,
                 optimized for multiple objectives, with verified
                 real-time guarantees for each individual application.
                 The Pareto-optimal configurations are handed over to
                 run-time management, which searches for a suitable
                 mapping according to this information. To provide any
                 real-time guarantees, the performance analysis needs to
                 be composable and the influence of the applications on
                 each other has to be bounded. We achieve this either by
                 spatial or a novel temporal isolation for tasks and by
                 exploiting composable networks-on-chip (NoCs). With the
                 proposed temporal isolation, tasks of different
                 applications can be mapped to the same resource, while,
                 with spatial isolation, one computing resource can be
                 exclusively used by only one application. The
                 experiments reveal that the success rate in finding
                 feasible application mappings can be increased by the
                 proposed temporal isolation by up to 30\% and energy
                 consumption can be reduced compared to spatial
                 isolation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hassan:2018:EID,
  author =       "Mohamed Hassan and Anirudh M. Kaushik and Hiren
                 Patel",
  title =        "Exposing Implementation Details of Embedded {DRAM}
                 Memory Controllers through Latency-based Analysis",
  journal =      j-TECS,
  volume =       "17",
  number =       "5",
  pages =        "90:1--90:??",
  month =        nov,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274281",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:41 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274281",
  abstract =     "We explore techniques to reverse-engineer DRAM
                 embedded memory controllers (MCs), including page
                 policies, address mapping, and command arbitration.
                 There are several benefits to knowing this information:
                 They allow tightening worst-case bounds of embedded
                 systems and platform-aware optimizations at the
                 operating system, source-code, and compiler levels. We
                 develop a latency-based analysis, which we use to
                 devise algorithms and C programs to extract MC
                 properties. We show the effectiveness of the proposed
                 approach by reverse-engineering the MC details in the
                 XUPV5-LX110T Xilinx platform. Furthermore, to cover a
                 breadth of policies, we use a simulation framework and
                 document our findings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2019:EES,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Embedded Security Challenge: Cyber Security
                 Contests in the Embedded Computing Domain",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "91:1--91:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3293502",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2019:DOS,
  author =       "Hui Sun and Jianzhong Huang and Xiao Qin and
                 Changsheng Xie",
  title =        "{DLSpace}: Optimizing {SSD} Lifetime via An Efficient
                 Distributed Log Space Allocation Strategy",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "92:1--92:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3284749",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3284749",
  abstract =     "Due to limited numbers of program/erase cycles (i.e.,
                 P/Es) of NAND Flash, excessive out-of-place update and
                 erase-before-write operations wear out these P/Es
                 during garbage collections, which adversely shorten
                 solid state disk (i.e., SSD) lifetime. The log space in
                 NAND Flash space of an SSD performs as an updated page
                 s buffer, which lowers garbage-collection frequency
                 while reducing consumption of P/Es to extend SSD
                 lifetime. In this article, we propose DLSpace, a novel
                 distributed log space allocation strategy named
                 distributed log space, which divides log space into
                 block-level log space and page-level log space to
                 significantly optimize SSD lifetime. DLSpace's log page
                 space is dedicated to data pages in a data block. Such
                 log page space only buffers page-update operations in
                 this data block; thereby the use of log blocks for
                 postponing garbage collection delays. DLSpace is
                 conducive to fully utilizing pages in data and log
                 blocks to avoid erasures of blocks with free pages.
                 Consequently, DLSpace decreases write amplification by
                 reducing excessive valid page-rewrite and block-erase
                 operations under random-write-intensive workloads. We
                 carried out quantitative research on the extension of
                 SSD lifetime by virtue of three metrics (i.e., write
                 amplification, the number of block-erase operations,
                 and the delay time before the first garbage collection
                 occurring). Experimental results reveal that compared
                 with the existing t raditional allocation strategy for
                 l og space (i.e., TLSpace), DLSpace reduces write
                 amplification and the number of erase operations by up
                 to 55.2\% and 64.1\% to the most extent, respectively.
                 DLSpace also extends TLSpace's delay time of garbage
                 collections by 73.3\% to optimize SSD lifetime.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Balsamo:2019:MPN,
  author =       "Domenico Balsamo and Benjamin J. Fletcher and Alex S.
                 Weddell and Giorgos Karatziolas and Bashir M.
                 Al-Hashimi and Geoff V. Merrett",
  title =        "Momentum: Power-neutral Performance Scaling with
                 Intrinsic {MPPT} for Energy Harvesting Computing
                 Systems",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "93:1--93:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3281300",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent research has looked to supplement or even
                 replace the batteries in embedded computing systems
                 with energy harvesting, where energy is derived from
                 the device's environment. However, such supplies are
                 generally unpredictable and highly variable, and hence
                 systems typically incorporate large external energy
                 buffers (e.g., supercapacitors) to sustain computation;
                 however, these pose environmental issues and increase
                 system size and cost. This article proposes Momentum, a
                 general power-neutral methodology, with intrinsic
                 system-wide maximum power point tracking, that can be
                 applied to a wide range of different computing systems,
                 where the system dynamically scales its performance
                 (and hence power consumption) to optimize computational
                 progress depending on the power availability. Momentum
                 enables the system to operate around an efficient
                 operating voltage, maximizing forward application
                 execution, without adding any external tracking or
                 control units. This methodology combines at runtime (1)
                 a hierarchical control strategy that utilizes available
                 power management controls (such as dynamic voltage and
                 frequency scaling, and core hot-plugging) to achieve
                 efficient power-neutral operation; (2) a software-based
                 maximum power point tracking scheme (unlike existing
                 approaches, this does not require any additional
                 hardware), which adapts the system power consumption so
                 that it can work at the optimal operating voltage,
                 considering the efficiency of the entire system rather
                 than just the energy harvester; and (3) experimental
                 validation on two different scales of computing system:
                 a low power microcontroller (operating from the
                 already-present 4.7 $ \mu $F decoupling capacitance)
                 and a multi-processor system-on-chip (operating from
                 15.4mF added capacitance). Experimental results from
                 both a controlled supply and energy harvesting source
                 show that Momentum operates correctly on both platforms
                 and exhibits improvements in forward application
                 execution of up to 11\% when compared to existing
                 power-neutral approaches and 46\% compared to existing
                 static approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sheikh:2019:EEM,
  author =       "Saad Zia Sheikh and Muhammad Adeel Pasha",
  title =        "Energy-Efficient Multicore Scheduling for Hard
                 Real-Time Systems: a Survey",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "94:1--94:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3291387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As real-time embedded systems are evolving in scale
                 and complexity, the demand for a higher performance at
                 a minimum energy consumption has become a necessity.
                 Consequently, many embedded systems are now adopting
                 multicore architectures into their design. However,
                 scheduling on multicores is not a trivial task and
                 scheduling to minimize the energy consumption further
                 increases the complexity of the problem. This problem
                 is especially aggravated for hard real-time systems
                 where failure to meet a deadline can be catastrophic.
                 Such scheduling algorithms yearn for a polynomial time
                 complexity for the task-to-core assignment problem with
                 an objective to minimize the overall energy
                 consumption. There is now a trend toward heterogeneous
                 multicores where cores differ in power, performance,
                 and architectural capabilities. The desired performance
                 and energy consumption is attained by assigning a task
                 to the core that is best suited for it. In this
                 article, we present a survey on energy-efficient
                 multicore scheduling algorithms for hard real-time
                 systems. We summarize various algorithms reported in
                 the literature and classify them based on Partitioned,
                 Semi-Partitioned, and Global scheduling techniques for
                 both homogeneous and heterogeneous multicores. We also
                 present a detailed discussion on various open issues
                 within this domain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xie:2019:EWA,
  author =       "Guoqi Xie and Gang Zeng and Ryo Kurachi and Hiroaki
                 Takada and Renfa Li and Keqin Li",
  title =        "Exact {WCRT} Analysis for Message-Processing Tasks on
                 Gateway-Integrated In-Vehicle {CAN} Clusters",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "95:1--95:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3284178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A typical automotive integrated architecture is a
                 controller area network (CAN) cluster integrated by a
                 central gateway. This study proposes a novel and exact
                 worst-case response time (WCRT) analysis method for
                 message-processing tasks in the gateway. We first
                 propose a round search method to obtain lower bound on
                 response time (LBRT) and upper bound on response time
                 (UBRT), respectively. We then obtain the exact WCRT
                 belonging to the scope of the LBRT and UBRT with an
                 effective non-exhaustive exploration. Experimental
                 results on a real CAN message set reveal that the
                 proposed exact analysis method can reduce 99.99999\%
                 combinations on large-scale CAN clusters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Feng:2019:EUH,
  author =       "Zhiwei Feng and Nan Guan and Mingsong Lv and Weichen
                 Liu and Qingxu Deng and Xue Liu and Wang Yi",
  title =        "An Efficient {UAV} Hijacking Detection Method Using
                 Onboard Inertial Measurement Unit",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "96:1--96:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3289390",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3289390",
  abstract =     "With the fast growth of civil drones, their security
                 problems meet significant challenges. A commercial
                 drone may be hijacked by a GPS-spoofing attack for
                 illegal activities, such as terrorist attacks. The
                 target of this article is to develop a technique that
                 only uses onboard gyroscopes to determine whether a
                 drone has been hijacked. Ideally, GPS data and the
                 angular velocities measured by gyroscopes can be used
                 to estimate the acceleration of a drone, which can be
                 further compared with the measurement of the
                 accelerometer to detect whether a drone has been
                 hijacked. However, the detection results may not always
                 be accurate due to some calculation and measurement
                 errors, especially when no hijacking occurs in curve
                 trajectory situations. To overcome this, in this
                 article, we propose a novel and simple method to detect
                 hijacking only based on gyroscopes' measurements and
                 GPS data, without using any accelerometer in the
                 detection procedure. The computational complexity of
                 our method is very low, which is suitable to be
                 implemented in the drones with micro-controllers. On
                 the other hand, the proposed method does not rely on
                 any accelerometer to detect attacks, which means it
                 receives less information in the detection procedure
                 and may reduce the results accuracy in some special
                 situations. While the previous method can compensate
                 for this flaw, the high detection results also can be
                 guaranteed by using the above two methods. Experiments
                 with a quad-rotor drone are conducted to show the
                 effectiveness of the proposed method and the
                 combination method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yan:2019:CAR,
  author =       "Yin Yan and Girish Gokul and Karthik Dantu and Steven
                 Y. Ko and Lukasz Ziarek and Jan Vitek",
  title =        "Can {Android} Run on Time? {Extending} and Measuring
                 the {Android} Platform's Timeliness",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "97:1--97:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3289257",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Time predictability is difficult to achieve in the
                 complex, layered execution environments that are common
                 in modern embedded devices such as smartphones. We
                 explore adopting the Android programming model for a
                 range of embedded applications that extends beyond
                 mobile devices, under the constraint that changes to
                 widely used libraries should be minimized. The
                 challenges we explore include the interplay between
                 real-time activities and the rest of the system, how to
                 express the timeliness requirements of components, and
                 how well those requirements can be met on stock
                 embedded platforms. We detail the design and
                 implementation of our modifications to the Android
                 framework along with a real-time VM and OS, and we
                 provide experimental data validating feasibility over
                 five applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Abkenar:2019:GRU,
  author =       "Amin B. Abkenar and Seng W. Loke and Arkady Zaslavsky
                 and Wenny Rahayu",
  title =        "{GroupSense}: Recognizing and Understanding Group
                 Physical Activities using Multi-Device Embedded
                 Sensing",
  journal =      j-TECS,
  volume =       "17",
  number =       "6",
  pages =        "98:1--98:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3295747",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Human activity recognition using embedded mobile and
                 embedded sensors is becoming increasingly important.
                 Scaling up from individuals to groups, that is, Group
                 Activity Recognition (GAR), has attracted significant
                 attention recently. This article proposes a model and
                 modeling language for GAR called GroupSense-L and a
                 novel distributed middleware called GroupSense for
                 mobile GAR. We implemented and tested GroupSense using
                 smartphone sensors, smartwatch sensors, and embedded
                 sensors in things, where we have a protocol for these
                 different devices to exchange information required for
                 GAR. A range of continuous group activities (from
                 simple to fairly complex) illustrates our approach and
                 demonstrates the feasibility of our model and richness
                 of the proposed specialization. We then conclude with
                 lessons learned for GAR and future work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Derler:2019:GES,
  author =       "Patricia Derler and Klaus Schneider and Jean-Pierre
                 Talpin",
  title =        "Guest Editorial: Special Issue of {ACM TECS on the
                 ACM--IEEE International Conference on Formal Methods
                 and Models for System Design (MEMOCODE 2017)}",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3292422",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3292422",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2019:EHF,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Human Factors in Embedded Computing",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3302888",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3302888",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nuzzo:2019:SAG,
  author =       "Pierluigi Nuzzo and Jiwei Li and Alberto L.
                 Sangiovanni-Vincentelli and Yugeng Xi and Dewei Li",
  title =        "Stochastic Assume--Guarantee Contracts for
                 Cyber-Physical System Design",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3243216",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3243216",
  abstract =     "We present an assume-guarantee contract framework for
                 cyber-physical system design under probabilistic
                 requirements. Given a stochastic linear system and a
                 set of requirements captured by bounded Stochastic
                 Signal Temporal Logic (StSTL) contracts, we propose
                 algorithms to check contract compatibility,
                 consistency, and refinement, and generate a sequence of
                 control inputs that satisfies a contract. We leverage
                 encodings of the verification and control synthesis
                 tasks into mixed integer optimization problems, and
                 conservative approximations of probabilistic
                 constraints that produce sound and tractable problem
                 formulations. We illustrate the effectiveness of our
                 approach on three case studies, including the design of
                 controllers for aircraft power distribution networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Plassan:2019:MMA,
  author =       "Guillaume Plassan and Katell Morin-Allory and
                 Dominique Borrione",
  title =        "Mining Missing Assumptions from Counter-Examples",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3288759",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3288759",
  abstract =     "During the formal functional verification of
                 Register-Transfer Level designs, a false failure is
                 often observed. Most of the time, this failure is
                 caused by an underconstrained model. The analysis of
                 the root cause for the verification error and the
                 creation of missing assumptions are a significant time
                 burden. In this article, we present a methodology to
                 automatically mine these missing assumptions from
                 counter-examples. First, multiple counter-examples are
                 generated for the same property. Then, relevant
                 behaviors are mined from the counter-examples. Finally,
                 corresponding assumptions are filtered and a small
                 amount is returned to the user for review.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fellner:2019:MBM,
  author =       "Andreas Fellner and Willibald Krenn and Rupert Schlick
                 and Thorsten Tarrach and Georg Weissenbacher",
  title =        "Model-based, Mutation-driven Test-case Generation Via
                 Heuristic-guided Branching Search",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3289256",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3289256",
  abstract =     "This work introduces a heuristic-guided branching
                 search algorithm for model-based, mutation-driven
                 test-case generation. The algorithm is designed towards
                 the efficient and computationally tractable exploration
                 of discrete, non-deterministic models with huge state
                 spaces. Asynchronous parallel processing is a key
                 feature of the algorithm. The algorithm is inspired by
                 the successful path planning algorithm Rapidly
                 exploring Random Trees (RRT). We adapt RRT in several
                 aspects towards test-case generation. Most notably, we
                 introduce parametrized heuristics for start and
                 successor state selection, as well as a mechanism to
                 construct test cases from the data produced during the
                 search. We implemented our algorithm in the existing
                 test-case generation framework MoMuT. We present an
                 extensive evaluation of the proposed heuristics and
                 parameters of the algorithm, based on a diverse set of
                 demanding models obtained in an industrial context. In
                 total, we continuously utilized 128 CPU cores on three
                 servers for several weeks to gather the experimental
                 data presented. We show that branching search works
                 well and the use of multiple heuristics is justified.
                 With our new algorithm, we are now able to process
                 models consisting of over 2,300 concurrent objects. To
                 our knowledge, there is no other mutation-driven
                 test-case generation tool that is able to process
                 models of this magnitude.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Edwards:2019:CDC,
  author =       "Stephen A. Edwards and Richard Townsend and Martha
                 Barker and Martha A. Kim",
  title =        "Compositional Dataflow Circuits",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274280",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274280",
  abstract =     "We present a technique for implementing dataflow
                 networks as compositional hardware circuits. We first
                 define an abstract dataflow model with unbounded
                 buffers that supports data-dependent blocks (mux,
                 demux, and nondeterministic merge); we then show how to
                 faithfully implement such networks with bounded buffers
                 and handshaking. Handshaking admits compositionality:
                 our circuits can be connected with or without buffers,
                 and combinational cycles arise only from a completely
                 unbuffered cycle. While bounding buffer sizes can cause
                 the system to deadlock prematurely, the system is
                 guaranteed to produce the same, correct, data before
                 then. Thus, unless the system deadlocks, inserting or
                 removing buffers only affects its performance. We
                 demonstrate how this enables design space to be
                 explored.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reynolds:2019:MME,
  author =       "Thomas N. Reynolds and Adam Procter and William L.
                 Harrison and Gerard Allwein",
  title =        "The Mechanized Marriage of Effects and Monads with
                 Applications to High-assurance Hardware",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274282",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274282",
  abstract =     "Constructing high-assurance, secure hardware remains a
                 challenge, because to do so relies on both a verifiable
                 means of hardware description and implementation.
                 However, production hardware description languages
                 (HDL) lack the formal underpinnings required by formal
                 methods in security. Still, there is no such thing as
                 high-assurance systems without high-assurance hardware.
                 We present a core calculus of secure hardware
                 description with its formal semantics, security type
                 system, and mechanization in Coq. This calculus is the
                 core of the functional HDL, ReWire, shown in previous
                 work to have useful applications in reconfigurable
                 computing. This work supports a full-fledged, formal
                 methodology for producing high-assurance hardware.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chattopadhyay:2019:QIL,
  author =       "Sudipta Chattopadhyay and Moritz Beck and Ahmed Rezine
                 and Andreas Zeller",
  title =        "Quantifying the Information Leakage in Cache Attacks
                 via Symbolic Execution",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3288758",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3288758",
  abstract =     "Cache attacks allow attackers to infer the properties
                 of a secret execution by observing cache hits and
                 misses. But how much information can actually leak
                 through such attacks? For a given program, a cache
                 model, and an input, our CHALICE framework leverages
                 symbolic execution to compute the amount of information
                 that can possibly leak through cache attacks. At the
                 core of CHALICE is a novel approach to quantify
                 information leakage that can highlight critical cache
                 side-channel leakage on arbitrary binary code. In our
                 evaluation on real-world programs from OpenSSL and
                 Linux GDK libraries, CHALICE effectively quantifies
                 information leakage: For an AES-128 implementation on
                 Linux, for instance, CHALICE finds that a cache attack
                 can leak as much as 127 out of 128 bits of the
                 encryption key.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2019:ERR,
  author =       "Taeju Park and Kang G. Shin",
  title =        "{EACAN}: Reliable and Resource-Efficient {CAN}
                 Communications",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301309",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301309",
  abstract =     "Worst-case-based timing verification for the
                 controller area network (CAN) has been the bottleneck
                 to efficient use of its bandwidth. Especially, this
                 inefficiency comes from the worst-case transmission
                 error rate (WCTER) when transmission errors are
                 accounted for. To alleviate this inefficiency, we
                 propose a runtime adaptation scheme, error-adaptive CAN
                 (EACAN). EACAN observes the behavior of transmission
                 errors at runtime, and reconfigures the message period
                 based on the observation to meet the timing-failure
                 requirement. We experimentally evaluate the bandwidth
                 utilization of both EACAN- and WCTER-based
                 verification, showing that the former improves the
                 bandwidth utilization by 14\% over the latter.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pederson:2019:BCL,
  author =       "Daniel J. Pederson and Christopher J. Quinkert and
                 Muhammad A. Arafat and Jesse P. Somann and Jack D.
                 Williams and Rebecca A. Bercich and Zhi Wang and
                 Gabriel O. Albors and John G. R. Jefferys and Pedro P.
                 Irazoqui",
  title =        "The {Bionode}: a Closed-Loop Neuromodulation Implant",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301310",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301310",
  abstract =     "Implantable closed-loop neuromodulation devices for
                 use in long-term chronic studies in a lab or clinical
                 trial are expensive to acquire and difficult to modify
                 for specific use cases. This article documents the
                 design and fabrication of a wireless implantable device
                 using only commercially available off-the-shelf (COTS)
                 components. This device, called the Bionode, can record
                 and transmit up to four channels of biopotential data
                 while simultaneously providing biphasic
                 constant-current stimulation. The Bionode is a viable,
                 low-cost, reusable, and easily modifiable research tool
                 with clinical implications that has gained widespread
                 use in various research projects at Purdue
                 University.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Venkataramani:2019:SMM,
  author =       "Vanchinathan Venkataramani and Mun Choon Chan and
                 Tulika Mitra",
  title =        "Scratchpad-Memory Management for Multi-Threaded
                 Applications on Many-Core Architectures",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301308",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301308",
  abstract =     "Contemporary many-core architectures, such as Adapteva
                 Epiphany and Sunway TaihuLight, employ per-core
                 software-controlled Scratchpad Memory (SPM) rather than
                 caches for better performance-per-watt and
                 predictability. In these architectures, a core is
                 allowed to access its own SPM as well as remote SPMs
                 through the Network-On-Chip (NoC). However, the
                 compiler/programmer is required to explicitly manage
                 the movement of data between SPMs and off-chip memory.
                 Utilizing SPMs for multi-threaded applications is even
                 more challenging, as the shared variables across the
                 threads need to be placed appropriately. Accessing
                 variables from remote SPMs with higher access latency
                 further complicates this problem as certain links in
                 the NoC may be heavily contended by multiple threads.
                 Therefore, certain variables may need to be replicated
                 in multiple SPMs to reduce the contention delay and/or
                 the overall access time. We present Coordinated Data
                 Management (CDM), a compile-time framework that
                 automatically identifies shared/private variables and
                 places them with replication (if necessary) to suitable
                 on-chip or off-chip memory, taking NoC contention into
                 consideration. We develop both an exact Integer Linear
                 Programming (ILP) formulation as well as an iterative,
                 scalable algorithm for placing the data variables in
                 multi-threaded applications on many-core SPMs.
                 Experimental evaluation on the Parallella hardware
                 platform confirms that our allocation strategy reduces
                 the overall execution time and energy consumption by $
                 1.84 \times $ and $ 1.83 \times $, respectively, when
                 compared to the existing approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rhisheekesan:2019:CFC,
  author =       "Abhishek Rhisheekesan and Reiley Jeyapaul and Aviral
                 Shrivastava",
  title =        "Control Flow Checking or Not? (for Soft Errors)",
  journal =      j-TECS,
  volume =       "18",
  number =       "1",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301311",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:42 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301311",
  abstract =     "Huge leaps in performance and power improvements of
                 computing systems are driven by rapid technology
                 scaling, but technology scaling has also rendered
                 computing systems susceptible to soft errors. Among the
                 soft error protection techniques, Control Flow Checking
                 (CFC) based techniques have gained a reputation of
                 being lightweight yet effective. The main idea behind
                 CFCs is to check if the program is executing the
                 instructions in the right order. In order to validate
                 the protection claims of existing CFCs, we develop a
                 systematic and quantitative method to evaluate the
                 protection achieved by CFCs using the metric of
                 vulnerability. Our quantitative analysis indicates that
                 existing CFC techniques are not only ineffective in
                 providing protection from soft faults, but incur
                 additional performance and power overheads. Our results
                 show that software-only CFC protection schemes increase
                 system vulnerability by 18\%--21\% with 17\%--38\%
                 performance overhead and hybrid CFC protection
                 increases vulnerability by 5\%. Although the
                 vulnerability remains almost the same for hardware-only
                 CFC protection, they incur overheads of design cost,
                 area, and power due to the hardware modifications
                 required for their implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Roy:2019:CPR,
  author =       "Debapriya Basu Roy and Shivam Bhasin and Ivica
                 Nikoli{\'c} and Debdeep Mukhopadhyay",
  title =        "Combining {PUF} with {RLUTs}: a Two-party
                 Pay-per-device {IP} Licensing Scheme on {FPGAs}",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "12:1--12:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301307",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301307",
  abstract =     "With the popularity of modern FPGAs, the business of
                 FPGA specific intellectual properties (IP) is expanding
                 rapidly. This also brings in the concern of IP
                 protection. FPGA vendors are making serious efforts
                 toward IP protection, leading to standardization
                 schemes like IEEE P1735. However, efficient techniques
                 to prevent unauthorized overuse of IP still remain an
                 open question. In this article, we propose a two-party
                 IP protection scheme combining the re-configurable
                 look-up table primitive of modern FPGAs with physically
                 unclonable functions (PUF). The proposed scheme works
                 with the assumption that the FPGA vendor provides the
                 assurance of confidentiality and integrity of the
                 developed IP. The proposed scheme is considerably
                 lightweight compared to existing schemes, prevents
                 overuse, and does not involve FPGA vendors or trusted
                 third parties for IP licensing. The validation of the
                 proposed scheme is done on MCNC'91 benchmark and
                 third-party IPs like AES and lightweight MIPS
                 processors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhong:2019:SHS,
  author =       "Guanwen Zhong and Akshat Dubey and Cheng Tan and
                 Tulika Mitra",
  title =        "{Synergy}: an {HW\slash SW} Framework for High
                 Throughput {CNNs} on Embedded Heterogeneous {SoC}",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "13:1--13:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301278",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301278",
  abstract =     "Convolutional Neural Networks (CNN) have been widely
                 deployed in diverse application domains. There has been
                 significant progress in accelerating both their
                 training and inference using high-performance GPUs,
                 FPGAs, and custom ASICs for datacenter-scale
                 environments. The recent proliferation of mobile and
                 Internet of Things (IoT) devices have necessitated
                 real-time, energy-efficient deep neural network
                 inference on embedded-class, resource-constrained
                 platforms. In this context, we present Synergy, an
                 automated, hardware-software co-designed, pipelined,
                 high-throughput CNN inference framework on embedded
                 heterogeneous system-on-chip (SoC) architectures
                 (Xilinx Zynq). Synergy leverages, through
                 multi-threading, all the available on-chip resources,
                 which includes the dual-core ARM processor along with
                 the FPGA and the NEON Single-Instruction Multiple-Data
                 (SIMD) engines as accelerators. Moreover, Synergy
                 provides a unified abstraction of the heterogeneous
                 accelerators (FPGA and NEON) and can adapt to different
                 network configurations at runtime without changing the
                 underlying hardware accelerator architecture by
                 balancing workload across accelerators through
                 work-stealing. Synergy achieves 7.3X speedup, averaged
                 across seven CNN models, over a well-optimized
                 software-only solution. Synergy demonstrates
                 substantially better throughput and energy-efficiency
                 compared to the contemporary CNN implementations on the
                 same SoC architecture.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Guha:2019:SBS,
  author =       "Krishnendu Guha and Debasri Saha and Amlan
                 Chakrabarti",
  title =        "Stigmergy-Based Security for {SoC} Operations From
                 Runtime Performance Degradation of {SoC} Components",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "14:1--14:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301279",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301279",
  abstract =     "The semiconductor design industry of the embedded era
                 has embraced the globalization strategy for system on
                 chip (SoC) design. This involves incorporation of
                 various SoC components or intellectual properties
                 (IPs), procured from various third-party IP (3PIP)
                 vendors. However, trust of an SoC is challenged when a
                 supplied IP is counterfeit or implanted with a Hardware
                 Trojan Horse. Both roots of untrust may result in
                 sudden performance degradation at runtime. None of the
                 existing hardware security approaches organize the
                 behavior of the IPs at the low level, to ensure timely
                 completion of SoC operations. However, real-time SoC
                 operations are always associated with a deadline, and a
                 deadline miss due to sudden performance degradation of
                 any of the IPs may jeopardize mission-critical
                 applications. We seek refuge to the stigmergic behavior
                 exhibited in insect colonies to propose a decentralized
                 self-aware security approach. The self-aware security
                 modules attached with each IP works based on the
                 Observe-Decide-Act paradigm and not only detects
                 vulnerability but also organizes behavior of the IPs
                 dynamically at runtime so that the high-level objective
                 of task completion before a deadline is ensured.
                 Experimental validation and low overhead of our
                 proposed security modules over various benchmark IPs
                 and crypto SoCs depict the prospects of our proposed
                 mechanism.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2019:CRU,
  author =       "Alif Ahmed and Yuanwen Huang and Prabhat Mishra",
  title =        "Cache Reconfiguration Using Machine Learning for
                 Vulnerability-aware Energy Optimization",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "15:1--15:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309762",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309762",
  abstract =     "Dynamic cache reconfiguration has been widely explored
                 for energy optimization and performance improvement for
                 single-core systems. Cache partitioning techniques are
                 introduced for the shared cache in multicore systems to
                 alleviate inter-core interference. While these
                 techniques focus only on performance and energy, they
                 ignore vulnerability due to soft errors. In this
                 article, we present a static profiling based algorithm
                 to enable vulnerability-aware energy-optimization for
                 real-time multicore systems. Our approach can
                 efficiently search the space of cache configurations
                 and partitioning schemes for energy optimization while
                 task deadlines and vulnerability constraints are
                 satisfied. A machine learning technique has been
                 employed to minimize the static profiling time without
                 sacrificing the accuracy of results. Our experimental
                 results demonstrate that our approach can achieve
                 19.2\% average energy savings compared with the base
                 configuration, while drastically reducing the
                 vulnerability (49.3\% on average) compared to
                 state-of-the-art techniques. Furthermore, the machine
                 learning technique enabled more than 10x speedup in
                 static profiling time with a negligible prediction
                 error of 3\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lentaris:2019:SMF,
  author =       "George Lentaris and Konstantinos Maragos and Dimitrios
                 Soudris and Xenophon Zabulis and Manolis Lourakis",
  title =        "Single- and Multi-{FPGA} Acceleration of Dense Stereo
                 Vision for Planetary Rovers",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "16:1--16:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3312743",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3312743",
  abstract =     "Increased mobile autonomy is a vital requisite for
                 future planetary exploration rovers. Stereo vision is a
                 key enabling technology in this regard, as it can
                 passively reconstruct in three dimensions the
                 surroundings of a rover and facilitate the selection of
                 science targets and the planning of safe routes.
                 Nonetheless, accurate dense stereo algorithms are
                 computationally demanding. When executed on the
                 low-performance, radiation-hardened CPUs typically
                 installed on rovers, slow stereo processing severely
                 limits the driving speed and hence the science that can
                 be conducted in situ. Aiming to decrease execution time
                 while increasing the accuracy of stereo vision embedded
                 in future rovers, this article proposes HW/SW co-design
                 and acceleration on resource-constrained, space-grade
                 FPGAs. In a top-down approach, we develop a stereo
                 algorithm based on the space sweep paradigm, design its
                 parallel HW architecture, implement it with VHDL, and
                 demonstrate feasible solutions even on small-sized
                 devices with our multi-FPGA partitioning methodology.
                 To meet all cost, accuracy, and speed requirements set
                 by the European Space Agency for this system, we
                 customize our HW/SW co-processor by design space
                 exploration and testing on a Mars-like dataset.
                 Implemented on Xilinx Virtex technology, or European
                 NG-MEDIUM devices, the FPGA kernel processes a $ 1, 120
                 \times 1, 120 $ stereo pair in 1.7s--3.1s, utilizing
                 only 5.4--9.3 LUT6 and 200-312 RAMB18. The proposed
                 system exhibits up to $ 32 \times $ speedup over
                 desktop CPUs, or $ 2, 810 \times $ over space-grade
                 LEON3, and achieves a mean reconstruction error less
                 than 2cm up to 4m depth. Excluding errors exceeding 2cm
                 (which are less than 4\% of the total), the mean error
                 is under 8mm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Harb:2019:FIE,
  author =       "Salah Harb and Moath Jarrah",
  title =        "{FPGA} Implementation of the {ECC} Over {$ {\rm
                 GF}(2^m) $} for Small Embedded Applications",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310354",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3310354",
  abstract =     "In this article, we propose a compact elliptic curve
                 cryptographic core over GF($ 2^m$). The proposed
                 architecture is based on the Lopez-Dahab projective
                 point arithmetic operations. To achieve efficiency in
                 resources usage, an iterative method that uses a
                 ROM-based state machine is developed for the elliptic
                 curve cryptography (ECC) point doubling and addition
                 operations. The compact ECC core has been implemented
                 using Virtex FPGA devices. The number of the required
                 slices is 2,102 at 321MHz and 6,738 slices at 262MHz
                 for different GF($ 2^m$). Extensive experiments were
                 conducted to compare our solution to existing methods
                 in the literature. Our compact core consumes less area
                 than all previously proposed methods. It also provides
                 an excellent performance for scalar multiplication. In
                 addition, the ECC core is implemented in ASIC 0.18 $
                 \mu $ m CMOS technology, and the results show excellent
                 performance. Therefore, our proposed ECC core method
                 provides a balance in terms of speed, area, and power
                 consumption. This makes the proposed design the right
                 choice for cryptosystems in limited-resource devices
                 such as cell phones, IP cores of SoCs, and smart cards.
                 Moreover, side-channel attack resistance is implemented
                 to prevent power analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Arghavani:2019:CLB,
  author =       "Abbas Arghavani and Haibo Zhang and Zhiyi Huang and
                 Yawen Chen",
  title =        "{Chimp}: a Learning-based Power-aware Communication
                 Protocol for Wireless Body Area Networks",
  journal =      j-TECS,
  volume =       "18",
  number =       "2",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309763",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309763",
  abstract =     "Radio links in wireless body area networks (WBANs)
                 commonly experience highly time-varying attenuation due
                 to the dynamic network topology and frequent occlusions
                 caused by body movements, making it challenging to
                 design a reliable, energy-efficient, and real-time
                 communication protocol for WBANs. In this article, we
                 present Chimp, a learning-based power-aware
                 communication protocol in which each sending node can
                 self-learn the channel quality and choose the best
                 transmission power level to reduce energy consumption
                 and interference range while still guaranteeing high
                 communication reliability. Chimp is designed based on
                 learning automata that uses only the acknowledgment
                 packets and motion data from a local gyroscope sensor
                 to infer the real-time channel status. We design a new
                 cost function that takes into account the energy
                 consumption, communication reliability and interference
                 and develop a new learning function that can guarantee
                 to select the optimal transmission power level to
                 minimize the cost function for any given channel
                 quality. For highly dynamic postures such as walking
                 and running, we exploit the correlation between channel
                 quality and motion data generated by a gyroscope sensor
                 to fastly estimate channel quality, eliminating the
                 need to use expensive channel sampling procedures. We
                 evaluate the performance of Chimp through experiments
                 using TelosB motes equipped with the MPU-9250 motion
                 sensor chip and compare it with the state-of-the-art
                 protocols in different body postures. Experimental
                 results demonstrate that Chimp outperforms existing
                 schemes and works efficiently in most common body
                 postures. In high-date-rate scenarios, it achieves
                 almost the same performance as the optimal power
                 assignment scheme in which the optimal power level for
                 each transmission is calculated based on the collected
                 channel measurements in an off-line manner.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jiang:2019:BSR,
  author =       "Zhe Jiang and Neil Audsley and Pan Dong",
  title =        "{BlueIO}: a Scalable Real-Time Hardware {I/O}
                 Virtualization System for Many-core Embedded Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309765",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309765",
  abstract =     "In safety-critical systems, time predictability is
                 vital. This extends to I/O operations that require
                 predictability, timing-accuracy, parallel access,
                 scalability, and isolation. Currently, existing
                 approaches cannot achieve all these requirements at the
                 same time. In this article, we propose a framework of
                 hardware framework for real-time I/O
                 virtualization-termed BlueIO -to meet all these
                 requirements simultaneously. BlueIO integrates the
                 functionalities of I/O virtualization, low-layer I/O
                 drivers, and a clock cycle level timing-accurate I/O
                 controller (using the GPIOCP [36]). BlueIO provides
                 this functionality in the hardware layer, supporting
                 abstract virtualized access to I/O from the software
                 domain. The hardware implementation includes I/O
                 virtualization and I/O drivers, provides isolation and
                 parallel (concurrent) access to I/O operations, and
                 improves I/O performance. Furthermore, the approach
                 includes the previously proposed GPIOCP to guarantee
                 that I/O operations will occur at a specific clock
                 cycle (i.e., be timing-accurate and predictable). In
                 this article, we present a hardware consumption
                 analysis of BlueIO to show that it linearly scales with
                 the number of CPUs and I/O devices, which is evidenced
                 by our implementation in VLSI and FPGA. We also
                 describe the design and implementation of BlueIO and
                 demonstrate how a BlueIO-based system can be exploited
                 to meet real-time requirements with significant
                 improvements in I/O performance and a low running cost
                 on different OSs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2019:ERH,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Reflections on the History of
                 Cyber-Physical versus Embedded Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3325115",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3325115",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tabrizi:2019:DLC,
  author =       "Farid Molazem Tabrizi and Karthik Pattabiraman",
  title =        "Design-Level and Code-Level Security Analysis of {IoT}
                 Devices",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310353",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3310353",
  abstract =     "The Internet of Things (IoT) is playing an important
                 role in different aspects of our lives. Smart grids,
                 smart cars, and medical devices all incorporate IoT
                 devices as key components. The ubiquity and criticality
                 of these devices make them an attractive target for
                 attackers. Therefore, we need techniques to analyze
                 their security so that we can address their potential
                 vulnerabilities. IoT devices, unlike remote servers,
                 are user-facing and, therefore, an attacker may
                 interact with them more extensively, e.g., via physical
                 access. Existing techniques for analyzing security of
                 IoT devices either rely on a pre-defined set of attacks
                 and, therefore, have limited effect or do not consider
                 the specific capabilities the attackers have against
                 IoT devices. Security analysis techniques may operate
                 at the design-level, leveraging abstraction to avoid
                 state-space explosion, or at the code-level for
                 ensuring accuracy. In this article, we introduce two
                 techniques, one at the design-level, and the other at
                 the code-level, to analyze security of IoT devices, and
                 compare their effectiveness. The former technique uses
                 model checking, while the latter uses symbolic
                 execution, to find attacks based on the attacker's
                 capabilities. We evaluate our techniques on an open
                 source smart meter. We find that our code-level
                 analysis technique is able to find three times more
                 attacks and complete the analysis in half the time,
                 compared to the design-level analysis technique, with
                 no false positives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Belson:2019:SAP,
  author =       "Bruce Belson and Jason Holdsworth and Wei Xiang and
                 Bronson Philippa",
  title =        "A Survey of Asynchronous Programming Using Coroutines
                 in the {Internet of Things} and Embedded Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3319618",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3319618",
  abstract =     "Many Internet of Things and embedded projects are
                 event driven, and therefore require asynchronous and
                 concurrent programming. Current proposals for C++20
                 suggest that coroutines will have native language
                 support. It is timely to survey the current use of
                 coroutines in embedded systems development. This
                 article investigates existing research which uses or
                 describes coroutines on resource-constrained platforms.
                 The existing research is analysed with regard to:
                 software platform, hardware platform, and capacity; use
                 cases and intended benefits; and the application
                 programming interface design used for coroutines. A
                 systematic mapping study was performed, to select
                 studies published between 2007 and 2018 which contained
                 original research into the application of coroutines on
                 resource-constrained platforms. An initial set of 566
                 candidate papers, collated from on-line databases, were
                 reduced to only 35 after filters were applied,
                 revealing the following taxonomy. The C 8 C++
                 programming languages were used by 22 studies out of
                 35. As regards hardware, 16 studies used 8- or 16-bit
                 processors while 13 used 32-bit processors. The four
                 most common use cases were concurrency (17 papers),
                 network communication (15), sensor readings (9), and
                 data flow (7). The leading intended benefits were code
                 style and simplicity (12 papers), scheduling (9), and
                 efficiency (8). A wide variety of techniques have been
                 used to implement coroutines, including native macros,
                 additional tool chain steps, new language features, and
                 non-portable assembly language. We conclude that there
                 is widespread demand for coroutines on
                 resource-constrained devices. Our findings suggest that
                 there is significant demand for a formalised, stable,
                 well-supported implementation of coroutines in C++,
                 designed with consideration of the special needs of
                 resource-constrained devices, and further that such an
                 implementation would bring benefits specific to such
                 devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Batina:2019:ISI,
  author =       "Lejla Batina and Sherman S. M. Chow and Gerhard Hancke
                 and Zhe Liu",
  title =        "Introduction to the Special Issue on Cryptographic
                 Engineering for {Internet of Things}: Security
                 Foundations, Lightweight Solutions, and Attacks",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3322641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3322641",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2019:LIN,
  author =       "Lu Zhou and Chunhua Su and Zhi Hu and Sokjoon Lee and
                 Hwajeong Seo",
  title =        "Lightweight Implementations of {NIST P-256} and {SM2
                 ECC} on $8$-bit Resource-Constraint Embedded Device",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3236010",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3236010",
  abstract =     "Elliptic Curve Cryptography (ECC) now is one of the
                 most important approach to instantiate asymmetric
                 encryption and signature schemes, which has been
                 extensively exploited to protect the security of
                 cyber-physical systems. With the advent of the Internet
                 of Things (IoT), a great deal of constrained devices
                 may require software implementations of ECC operations.
                 Under this circumstances, the SM2, a set of public key
                 cryptographic algorithms based on elliptic curves
                 published by Chinese Commercial Cryptography
                 Administration Office, was standardized at ISO in 2017
                 to enhance the cyber-security. However, few research
                 works on the implementation of SM2 for constrained
                 devices have been conducted. In this work, we fill this
                 gap and propose our efficient, secure, and compact
                 implementation of scalar multiplication on a 256-bit
                 elliptic curve recommended by the SM2, as well as a
                 comparison implementation of scalar multiplication on
                 the same bit-length elliptic curve recommended by NIST.
                 We re-design some existent techniques to fit the
                 low-end IoT platform, namely 8-bit AVR processors, and
                 our implementations evaluated on the desired platform
                 show that the SM2 algorithms have competitive
                 efficiency and security with NIST, which would work
                 well to secure the IoT world.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Turan:2019:CFF,
  author =       "Furkan Turan and Ingrid Verbauwhede",
  title =        "Compact and Flexible {FPGA} Implementation of
                 {Ed25519} and {X25519}",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3312742",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3312742",
  abstract =     "This article describes a field-programmable gate array
                 (FPGA) cryptographic architecture, which combines the
                 elliptic curve--based Ed25519 digital signature
                 algorithm and the X25519 key establishment scheme in a
                 single module. Cryptographically, these are
                 high-security elliptic curve cryptography algorithms
                 with short key sizes and impressive execution times in
                 software. Our goal is to provide a lightweight FPGA
                 module that enables them on resource-constrained
                 devices, specifically for Internet of Things (IoT)
                 applications. In addition, we aim at extensibility with
                 customisable countermeasures against timing and
                 differential power analysis side-channel attacks and
                 fault-injection attacks. For the former, we offer a
                 choice between time-optimised versus constant-time
                 execution, with or without Z -coordinate randomisation
                 and base-point blinding; and for the latter, we offer
                 enabling or disabling default-case statements in the
                 Finite State Machine (FSM) descriptions. To obtain
                 compactness and at the same time fast execution times,
                 we make maximum use of the Digital Signal Processing
                 (DSP) slices on the FPGA. We designed a single
                 arithmetic unit that is flexible to support operations
                 with two moduli and non-modulus arithmetic. In
                 addition, our design benefits in-place memory
                 management and the local storage of inputs into DSP
                 slices' pipeline registers and takes advantage of
                 distributed memory. These eliminate a memory access
                 bottleneck. The flexibility is offered by a micro-code
                 supported instruction-set architecture. Our design
                 targets 7-Series Xilinx FPGAs and is prototyped on a
                 Zynq System-on-Chip (SoC). The base design combining
                 Ed25519 and X25519 in a single module, and its
                 implementation requires only around 11.1K Lookup Tables
                 (LUTs), 2.6K registers, and 16 DSP slices. Also, it
                 achieves performance of 1.6ms for a signature
                 generation and 3.6ms for a signature verification for a
                 1024-bit message with an 82MHz clock. Moreover, the
                 design can be optimised only for X25519, which gives
                 the most compact FPGA implementation compared to
                 previously published X25519 implementations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2019:XBL,
  author =       "Weiqiang Liu and Lei Zhang and Zhengran Zhang and
                 Chongyan Gu and Chenghua Wang and Maire O'neill and
                 Fabrizio Lombardi",
  title =        "{XOR}-Based Low-Cost Reconfigurable {PUFs} for {IoT}
                 Security",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274666",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274666",
  abstract =     "With the rapid development of the Internet of Things
                 (IoT), security has attracted considerable interest.
                 Conventional security solutions that have been proposed
                 for the Internet based on classical cryptography cannot
                 be applied to IoT nodes as they are typically
                 resource-constrained. A physical unclonable function
                 (PUF) is a hardware-based security primitive and can be
                 used to generate a key online or uniquely identify an
                 integrated circuit (IC) by extracting its internal
                 random differences using so-called challenge-response
                 pairs (CRPs). It is regarded as a promising low-cost
                 solution for IoT security. A logic reconfigurable PUF
                 (RPUF) is highly efficient in terms of hardware cost.
                 This article first presents a new classification for
                 RPUFs, namely circuit-based RPUF (C-RPUF) and
                 algorithm-based RPUF (A-RPUF); two Exclusive OR
                 (XOR)-based RPUF circuits (an XOR-based reconfigurable
                 bistable ring PUF (XRBR PUF) and an XOR-based
                 reconfigurable ring oscillator PUF (XRRO PUF)) are
                 proposed. Both the XRBR and XRRO PUFs are implemented
                 on Xilinx Spartan-6 field-programmable gate arrays
                 (FPGAs). The implementation results are compared with
                 previous PUF designs and show good uniqueness and
                 reliability. Compared to conventional PUF designs, the
                 most significant advantage of the proposed designs is
                 that they are highly efficient in terms of hardware
                 cost. Moreover, the XRRO PUF is the most efficient
                 design when compared with previous RPUFs. Also, both
                 the proposed XRRO and XRBR PUFs require only 12.5\% of
                 the hardware resources of previous bitstable ring PUFs
                 and reconfigurable RO PUFs, respectively, to generate a
                 1-bit response. This confirms that the proposed XRBR
                 and XRRO PUFs are very efficient designs with good
                 uniqueness and reliability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2019:ESA,
  author =       "Robert P. Lee and Konstantinos Markantonakis and Raja
                 Naeem Akram",
  title =        "Ensuring Secure Application Execution and
                 Platform-Specific Execution in Embedded Devices",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3284361",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3284361",
  abstract =     "The Internet of Things (IoT) is expanding at a large
                 rate, with devices found in commercial and domestic
                 settings from industrial sensors to home appliances.
                 However, as the IoT market grows, so does the number of
                 attacks made against it with some reports claiming an
                 increase of 600\% in 2017. This work seeks to prevent
                 code replacement, injection, and exploitation attacks
                 by ensuring correct and platform specific application
                 execution. This combines two previously studied
                 problems: secure application execution and binding
                 hardware and software. We present descriptions of both
                 problems and requirements for ensuring both
                 simultaneously. We then propose a scheme extending
                 previous work that meets these requirements, and
                 describe our implementation of the soft-core Secure
                 Execution Processor developed and tested on Xilinx
                 Spartan-6 FPGA. Finally, we analyse the scheme and our
                 implementation according to performance and the
                 requirements listed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cherif:2019:LSD,
  author =       "Amina Cherif and Malika Belkadi and Damien Sauveron",
  title =        "A Lightweight and Secure Data Collection Serverless
                 Protocol Demonstrated in an Active {RFIDs} Scenario",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274667",
  abstract =     "In the growing Internet of Things context, thousands
                 of computing devices with various functionalities are
                 producing data (from environmental sensors or other
                 sources). However, they are also collecting, storing,
                 processing and transmitting data to eventually
                 communicate them securely to third parties (e.g.,
                 owners of devices or cloud data storage). The deployed
                 devices are often battery-powered mobile or static
                 nodes equipped with sensors and/or actuators, and they
                 communicate using wireless technologies. Examples
                 include unmanned aerial vehicles, wireless sensor
                 nodes, smart beacons, and wearable health objects. Such
                 resource-constrained devices include Active Radio
                 Frequency IDentification (RFID) nodes, and these are
                 used to illustrate our proposal. In most scenarios,
                 these nodes are unattended in an adverse environment,
                 so data confidentiality must be ensured from the
                 sensing phase through to delivery to authorized
                 entities: in other words, data must be securely stored
                 and transmitted to prevent attack by active adversaries
                 even if the nodes are captured. However, due to the
                 scarce resources available to nodes in terms of energy,
                 storage, and/or computation, the proposed security
                 solution has to be lightweight. In this article, we
                 propose a serverless protocol to enable Mobile Data
                 Collectors (MDCs), such as drones, to securely collect
                 data from mobile and static Active RFID nodes and then
                 deliver them later to an authorized third party. The
                 whole solution ensures data confidentiality at each
                 step (from the sensing phase, before data collection by
                 the MDC, once data have been collected by MDC, and
                 during final delivery), while fulfilling the
                 lightweight requirements for the resource-limited
                 entities involved. To assess the suitability of the
                 protocol against the performance requirements, it was
                 implemented on the most resource-constrained devices to
                 get the worst possible results. In addition, to prove
                 the protocol fulfills the security requirements, it was
                 analyzed using security games and also formally
                 verified using the AVISPA and ProVerif tools.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2019:LCP,
  author =       "Lu Zhou and Chunhua Su and Kuo-Hui Yeh",
  title =        "A Lightweight Cryptographic Protocol with
                 Certificateless Signature for the {Internet of
                 Things}",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301306",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301306",
  abstract =     "The universality of smart-devices has brought rapid
                 development and the significant advancement of
                 ubiquitous applications for the Internet of Things
                 (IoT). Designing new types of IoT-compatible
                 cryptographic protocols has become a more popular way
                 to secure IoT-based applications. Significant attention
                 has been dedicated to the challenge of implementing a
                 lightweight and secure cryptographic protocol for IoT
                 devices. In this study, we propose a lightweight
                 cryptographic protocol integrating certificateless
                 signature and bilinear pairing crypto-primitives. In
                 the proposed protocol, we elegantly refine the
                 processes to account for computation-limited IoT
                 devices during security operations. Rigorous security
                 analyses are conducted to guarantee the robustness of
                 the proposed cryptographic protocol. In addition, we
                 demonstrate a thorough performance evaluation, where an
                 IoT-based test-bed, i.e., the Raspberry PI, is
                 simulated as the underlying platform of the
                 implementation of our proposed cryptographic protocol.
                 The results show the practicability of the proposed
                 protocol.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sha:2019:CED,
  author =       "Le-Tian Sha and Fu Xiao and Hai-Ping Huang and Yu Chen
                 and Ru-Chuan Wang",
  title =        "Catching Escapers: a Detection Method for Advanced
                 Persistent Escapers in Industry {Internet of Things}
                 Based on Identity-based Broadcast Encryption {(IBBE)}",
  journal =      j-TECS,
  volume =       "18",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3319615",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3319615",
  abstract =     "As the Industry 4.0 or Internet of Things (IoT) era
                 begins, security plays a key role in the Industry
                 Internet of Things (IIoT) due to various threats, which
                 include escape or Distributed Denial of Service (DDoS)
                 attackers in the virtualization layer and vulnerability
                 exploiters in the device layer. A successful cross-VM
                 escape attack in the virtualization layer combined with
                 cross-layer penetration in the device layer, which we
                 define as an Advanced Persistent Escaper (APE), poses a
                 great threat. Therefore, the development of detection
                 and rejection methods for APEs across multiple layers
                 in IIoT is an open issue. To the best of our knowledge,
                 less effective methods are established, especially for
                 vulnerability exploitation in the virtualization layer
                 and backdoor leverage in the device layer. On the basis
                 of this, we propose Escaper Cops (EscaperCOP), a
                 detection method for cross-VM escapers in the
                 virtualization layer and cross-layer penetrators in the
                 device layer. In particular, a new detection method for
                 guest-to-host escapers is proposed for the
                 virtualization layer. Finally, a novel encryption
                 method based on Identity-based Broadcast Encryption
                 (IBBE) is proposed to protect the critical components
                 in EscaperCOP, detection library, and control command
                 library. To verify our method, experimental tests are
                 performed for a large number of APEs in an IIoT
                 framework. The test results have demonstrated the
                 proposed method is effective with an acceptable level
                 of detection ratio.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2019:OPM,
  author =       "Rehan Ahmed and Bernhard Buchli and Stefan Draskovic
                 and Lukas Sigrist and Pratyush Kumar and Lothar
                 Thiele",
  title =        "Optimal Power Management with Guaranteed Minimum
                 Energy Utilization for Solar Energy Harvesting
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3317679",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3317679",
  abstract =     "In this work, we present a formal study on optimizing
                 the energy consumption of energy harvesting embedded
                 systems. To deal with the uncertainty inherent in solar
                 energy harvesting systems, we propose the Stochastic
                 Power Management (SPM) scheme, which builds statistical
                 models of harvested energy based on historical data.
                 The proposed stochastic scheme maximizes the lowest
                 energy consumption across all time intervals while
                 giving strict probabilistic guarantees on not
                 encountering battery depletion. For situations where
                 historical data is not available, we propose the use of
                 (i) a Finite Horizon Control (FHC) scheme and (ii) a
                 non-uniformly scaled energy estimator based on an
                 astronomical model, which is used by FHC. Under certain
                 realistic assumptions, the FHC scheme can provide
                 guarantees on minimum energy usage that can be
                 supported over all times. We further propose and
                 evaluate a piece-wise linear approximation of FHC for
                 efficient implementation in resource-constrained
                 embedded systems. With extensive experimental
                 evaluation for eight publicly available datasets and
                 two datasets collected with our own deployments, we
                 quantitatively establish that the proposed solutions
                 are highly effective at providing a guaranteed minimum
                 service level and significantly outperform existing
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2019:EAR,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Adversaries and Robustness",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "30:1--30:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3345556",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3345556",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2019:CDM,
  author =       "Daibo Liu and Zhichao Cao and Mingyan Liu and Mengshu
                 Hou and Hongbo Jinag",
  title =        "Contention-Detectable Mechanism for Receiver-Initiated
                 {MAC}",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "31:1--31:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3317683",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3317683",
  abstract =     "The energy efficiency and delivery robustness are two
                 critical issues for low duty-cycled wireless sensor
                 networks. The asynchronous receiver-initiated
                 duty-cycling media access control (MAC) protocols have
                 shown their effectiveness through various studies. In
                 receiver-initiated MACs, packet transmission is
                 triggered by the probe of receiver. However, it suffers
                 from the performance degradation incurred by packet
                 collision, especially under bursty traffic. Several
                 protocols have been proposed to address this problem,
                 but their performance is restricted by the unnecessary
                 backoff time and long negotiation process. In this
                 article, we present CD-MAC, an energy-efficient and
                 robust contention-detectable mechanism for addressing
                 the collision-catching problem in receiver-initiated
                 MACs. By exploring the temporal diversity of the
                 acknowledgments, a receiver recognizes the potential
                 senders and subsequently polls individual senders one
                 by one. On that basis, CD-MAC can successfully avoid
                 packet collision even though multiple senders have data
                 packets to transmit to the same receiver. We implement
                 CD-MAC in TinyOS and evaluate its performance on an
                 indoor testbed with single-hop and multi-hop network
                 scenarios. The results show that CD-MAC can
                 significantly improve throughput by 1.72 times compared
                 with the state-of-the-art receiver-initiated MAC
                 protocol under bursty traffic loads. The results also
                 demonstrate that CD-MAC can effectively mitigate the
                 influence of hidden terminal problem and adapt to
                 network dynamics well.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2019:NNA,
  author =       "Xiaokang Wang and Laurence T. Yang and Hongguo Li and
                 Man Lin and Jianjun Han and Bernady O. Apduhan",
  title =        "{NQA}: a Nested Anti-collision Algorithm for {RFID}
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "32:1--32:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3330139",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3330139",
  abstract =     "Radio frequency identification (RFID) systems, as one
                 of the key components in the Internet of Things (IoT),
                 have attracted much attention in the domains of
                 industry and academia. In practice, the performance of
                 RFID systems rather relies on the effectiveness and
                 efficiency of anti-collision algorithms. A large body
                 of studies have recently focused on the anti-collision
                 algorithms, such as the Q-algorithm (QA), which has
                 been successfully utilized in EPCglobal Class-1
                 Generation-2 protocol. However, the performance of
                 those anti-collision algorithms needs to be further
                 improved. Observe that fully exploiting the
                 pre-processing time can improve the efficiency of the
                 QA algorithm. With an objective of improving the
                 performance for anti-collision, we propose a Nested
                 Q-algorithm (NQA), which makes full use of such
                 pre-processing time and incorporates the advantages of
                 both Binary Tree (BT) algorithm and QA algorithm.
                 Specifically, based on the expected number of collision
                 tags, the NQA algorithm can adaptively select either BT
                 or QA to identify collision tags. Extensive simulation
                 results validate the efficiency and effectiveness of
                 our proposed NQA (i.e., less running time for
                 processing the same number of active tags) when
                 compared to the existing algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Su:2019:TFR,
  author =       "Fang Su and Yongpan Liu and Xiao Sheng and Hyung Gyu
                 Lee and Naehyuck Chang and Huazhong Yang",
  title =        "A Task Failure Rate Aware Dual-Channel Solar Power
                 System for Nonvolatile Sensor Nodes",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "33:1--33:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3320270",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3320270",
  abstract =     "In line with the rapid development of the Internet of
                 Things (IoT), the maintenance of on-board batteries for
                 a trillion sensor nodes has become prohibitive both in
                 time and costs. Energy harvesting is a promising
                 solution to this problem. However, conventional
                 energy-harvesting systems with storage suffer from low
                 efficiency because of conversion loss and storage
                 leakage. Direct supply systems without energy buffer
                 provide higher efficiency, but fail to satisfy quality
                 of service (QoS) due to mismatches between input power
                 and workloads. Recently, a novel dual-channel
                 photovoltaic power system has paved the way to achieve
                 both high energy efficiency and QoS guarantee. This
                 article focuses on the design-time and run-time
                 co-optimization of the dual-channel solar power system.
                 At the design stage, we develop a task failure rate
                 estimation framework to balance design costs and
                 failure rate. At run-time, we propose a task failure
                 rate aware QoS tuning algorithm to further enhance
                 energy efficiency. Through the experiments on both a
                 simulation platform and a prototype board, this study
                 demonstrates a 27\% task failure rate reduction
                 compared with conventional architectures with identical
                 design costs. And the proposed online QoS tuning
                 algorithm brings up to 30\% improvement in energy
                 efficiency with nearly zero failure rate penalty.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ponugoti:2019:EFH,
  author =       "Mounika Ponugoti and Aleksandar Milenkovic",
  title =        "Enabling On-the-Fly Hardware Tracing of Data Reads in
                 Multicores",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "34:1--34:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3322642",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3322642",
  abstract =     "Software debugging is one of the most challenging
                 aspects of embedded system development due to growing
                 hardware and software complexity, limited visibility of
                 system components, and tightening time-to-market. To
                 find software bugs faster, developers often rely on
                 on-chip trace modules with large buffers to capture
                 program execution traces with minimum interference with
                 program execution. However, the high volumes of trace
                 data and the high cost of trace modules limit the
                 visibility into the system operation to short program
                 segments. This article introduces a new
                 hardware/software technique for capturing and filtering
                 read data value traces in multicores that enables a
                 complete reconstruction of parallel program execution.
                 The proposed technique exploits tracking of data reads
                 in data caches and cache coherence protocol states to
                 minimize the number of trace messages streamed out of
                 the target platform to the software debugger. The
                 effectiveness of the proposed technique is determined
                 by analyzing the required trace port bandwidth and
                 trace buffer sizes as a function of the data cache size
                 and the number of processor cores. The results show
                 that the proposed technique significantly reduces the
                 required trace port bandwidth, from 12.2 to 73.9 times,
                 when compared to the Nexus-like read data value
                 tracing, thus enabling continuous on-the-fly data
                 tracing at modest hardware cost.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Al-bayati:2019:PSD,
  author =       "Zaid Al-bayati and Youcheng Sun and Haibo Zeng and
                 Marco {Di Natale} and Qi Zhu and Brett H. Meyer",
  title =        "Partitioning and Selection of Data Consistency
                 Mechanisms for Multicore Real-Time Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "35:1--35:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3320271",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3320271",
  abstract =     "Multicore platforms are becoming increasingly popular
                 in real-time systems. One of the major challenges in
                 designing multicore real-time systems is ensuring
                 consistent and timely access to shared resources.
                 Lock-based protection mechanisms such as MPCP and MSRP
                 have been proposed to guarantee mutually exclusive
                 access in multicore systems at the expense of blocking.
                 In this article, we consider partitioning and
                 scheduling in multicore real-time systems with resource
                 sharing. We first propose a resource-aware task
                 partitioning algorithm for systems with lock-based
                 protection. Wait-free methods, which ensure consistent
                 access to shared memory resources with negligible
                 blocking at the expense of additional memory space, are
                 a suitable alternative when the shared resource is a
                 communication buffer. We propose several approaches to
                 solve the joint problem of task partitioning and the
                 selection of a data consistency mechanism (lock-based
                 or wait-free). The problem is first formulated as an
                 Integer Linear Programming (ILP). For large systems
                 where an ILP solution is not scalable, we propose two
                 heuristic algorithms. Experimental results compare the
                 effectiveness of the proposed approaches in finding
                 schedulable systems with low memory cost and show how
                 the use of wait-free methods can significantly improve
                 schedulability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Desirena-Lopez:2019:TAR,
  author =       "G. Desirena-L{\'o}pez and A. Ram{\'\i}rez-Trevi{\~n}o
                 and J. L. Briz and C. R. V{\'a}zquez and D.
                 G{\'o}mez-Guti{\'e}rrez",
  title =        "Thermal-aware Real-time Scheduling Using Timed
                 Continuous {Petri} Nets",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3322643",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3322643",
  abstract =     "We present a thermal-aware, hard real-time (HRT)
                 global scheduler for a multiprocessor system designed
                 upon three novel techniques. First, we present a
                 modeling methodology based on Timed Continuous Petri
                 nets (TCPN) that yields a complete state variable
                 model, including job arrivals, CPU usage, power, and
                 thermal behavior. The model is accurate and avoids the
                 calibration stage of RC thermal models. Second, based
                 on this model, a linear programming problem (LPP)
                 determines the existence of a feasible HRT
                 thermal-aware schedule. Last, a sliding-mode controller
                 and an online discretization algorithm implement the
                 global HRT scheduler, which is capable of managing
                 thermal constraints, context switching, migrations, and
                 disturbances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ruaro:2019:SAQ,
  author =       "Marcelo Ruaro and Axel Jantsch and Fernando Gehm
                 Moraes",
  title =        "Self-Adaptive {QoS} Management of Computation and
                 Communication Resources in Many-Core {SoCs}",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "37:1--37:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3328755",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3328755",
  abstract =     "Providing quality of service (QoS) for many-core
                 systems with dynamic application admission is
                 challenging due to the high amount of resources to
                 manage and the unpredictability of computation and
                 communication events. Related works propose a
                 self-adaptive QoS mechanism concerned either in
                 communication or computation resources, lacking,
                 however, a comprehensive QoS management of both.
                 Assuming a many-core system with QoS monitoring,
                 runtime circuit-switching establishment, task
                 migration, and a soft real-time task scheduler, this
                 work fills this gap by proposing a novel self-adaptive
                 QoS management. The contribution of this proposal comes
                 with the following features in the QoS management: (i)
                 comprehensiveness, by covering communication and
                 computation resources; (ii) online, adopting the ODA
                 (Observe, Decide, Act) runtime closed-loop adaptation;
                 and (iii) reactive and proactive decisions, by using a
                 dynamic application profile extraction technique, which
                 enables the QoS management to be aware of the profile
                 of running applications, allowing it to take proactive
                 decisions based on a prediction analysis. The proposed
                 QoS management adopts a decentralized organization by
                 partitioning the system in clusters, each one managed
                 by a dedicated processor, making the proposal scalable.
                 Results show that the proactive feature accurately
                 extracts the applications' profile, and can prevent
                 future QoS violations. The synergy of reactive and
                 proactive decisions was able to sustain QoS, reducing
                 the deadline miss rate by 99.5\% with a severe
                 disturbance in communication and computation levels,
                 and avoiding deadline misses up to 70\% of system
                 utilization.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ali:2019:CCT,
  author =       "G. G. Md. Nawaz Ali and Md. Noor-A-Rahim and Md.
                 Ashiqur Rahman and Beshah Ayalew and Peter H. J. Chong
                 and Yong Liang Guan",
  title =        "Cooperative Cache Transfer-based On-demand Network
                 Coded Broadcast in Vehicular Networks",
  journal =      j-TECS,
  volume =       "18",
  number =       "4",
  pages =        "38:1--38:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329865",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:43 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3329865",
  abstract =     "Real-time traffic updates, safety and comfort driving,
                 infotainment, and so on, are some envisioned
                 applications in vehicular networks. Unlike traditional
                 broadcast, network-coding-assisted broadcast can
                 satisfy multiple vehicles with different data items in
                 a coded form. However, server side encoding requires
                 the prior knowledge about vehicles' cache information
                 for the successful decoding at the vehicles' sides. The
                 explicit cache upload from vehicles to Road Side Unit
                 (RSU) wastes upload bandwidth. In multi-RSU vehicular
                 networks, we propose a Cooperative Cache Transfer-based
                 On-demand Network Coded Broadcast called CCTCB. In the
                 proposed CCTCB approach, vehicles do not need to upload
                 their cache information to the server, rather the RSU
                 server learns the vehicles' cache intrinsically. We
                 derive a probabilistic model to analyze the coding
                 opportunity in the proposed cooperative cache transfer
                 mechanism incorporating vehicle mobility. The
                 comprehensive simulation results validate the
                 superiority of the proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2019:OIW,
  author =       "Yu-Chieh Chen and Ching-Chih Chang and Ramesh Perumal
                 and Shih-Rung Yeh and Yen-Chung Chang and Hsin Chen",
  title =        "Optimization and Implementation of Wavelet-based
                 Algorithms for Detecting High-voltage Spindles in
                 Neuron Signals",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "39:1--39:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3329864",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3329864",
  abstract =     "This article presents a microcontroller unit (MCU)
                 based simplified discrete wavelet transform (Sim-DWT)
                 algorithm that can detect high-voltage spindles (HVSs)
                 in local field potential (LFP) signals. The Sim-DWT
                 algorithm operates in an 8-bit MCU, 8MHz operating
                 clock and 16 sample points of buffers to detect HVSs
                 with a frequency range of 5-15Hz. The requirement of
                 only sixteen 8-bit sample points as the window length
                 for calculation and no need for a multiplier render the
                 Sim-DWT easy to implement in an MCU with limited
                 hardware resources. The Sim-DWT is applied in an 8-bit
                 MCU with 6mW power consumption (including IO ports) and
                 was tested for detecting LFP signals in vivo. The
                 design methods and the accuracy of three typical types
                 of mother wavelet functions (Haar, DB4, Morlet) in the
                 Sim-DWT were also tested and compared with those of a
                 PC-based system. The experimental results showed that
                 with appropriately designed cMW functions in the
                 Sim-DWT, HVSs could be detected more accurately than
                 they could be in PC-based software. The present study
                 indicates that the optimized HVS detector (Sim-DWT) can
                 be implemented in an 8-bit MCU with limited hardware
                 resources and is suitable to serve as the digital core
                 in a closed-loop deep brain stimulator microsystem in
                 the future.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Noltsis:2019:CLC,
  author =       "Michail Noltsis and Nikolaos Zambelis and Francky
                 Catthoor and Dimitrios Soudris",
  title =        "A Closed-Loop Controller to Ensure Performance and
                 Temperature Constraints for Dynamic Applications",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "40:1--40:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3343030",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3343030",
  abstract =     "To secure correct system operation, a plethora of
                 Reliability, Availability and Serviceability (RAS)
                 techniques have been deployed by circuit designers. RAS
                 mechanisms however, come with the cost of extra clock
                 cycles. In addition, a wide variety of dynamic
                 workloads and different input conditions often
                 constitute preemptive dependability techniques hard to
                 implement. To this end, we focus on a realistic case
                 study of a closed-loop controller that mitigates
                 performance variation with a reactive response. This
                 concept has been discussed but was only illustrated on
                 small benchmarks. In particular, the extension of the
                 approach to manage performance of dynamic workloads on
                 a target platform has not been shown earlier. We
                 compare our scheme against the version of a Linux CPU
                 frequency governor in terms of timing response and
                 energy consumption. Finally, we move forward and
                 suggest a new flavor of our controller to efficiently
                 manage processor temperature. Again, the concept is
                 illustrated with a realistic case study and compared to
                 a modern temperature manager.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Durrieu:2019:GAC,
  author =       "Guy Durrieu and Claire Pagetti",
  title =        "{GRec}: Automatic Computation of Reconfiguration
                 Graphs for Multi-core Platforms",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "41:1--41:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3350533",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3350533",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2019:SEA,
  author =       "Siwen Zhu and Yi Tang and Junxiang Zheng and Yongzhi
                 Cao and Hanpin Wang and Yu Huang and Marian Margraf",
  title =        "Sample Essentiality and Its Application to Modeling
                 Attacks on Arbiter {PUFs}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "42:1--42:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344148",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3344148",
  abstract =     "Physically Unclonable Functions (PUFs), as an
                 alternative hardware-based security method, have been
                 challenged by some modeling attacks. As is known to
                 all, samples are significant in modeling attacks on
                 PUFs, and thus, some efforts have been made to expand
                 sample sets therein to improve modeling attacks. A
                 closer examination, however, reveals that not all
                 samples contribute to modeling attacks equally.
                 Therefore, in this article, we introduce the concept of
                 sample essentiality for describing the contribution of
                 a sample in modeling attacks and point out that any
                 sample without sample essentiality cannot enhance some
                 modeling attacks on PUFs. As a by-product, we find
                 theoretically and empirically that the samples expanded
                 by the procedures proposed by Chatterjee et al. do not
                 satisfy our sample essentiality. Furthermore, we
                 propose the notion of essential sample sets for
                 datasets and discuss its basic properties. Finally, we
                 demonstrate that our results about sample essentiality
                 can be used to reduce samples efficiently and benefit
                 sample selection in modeling attacks on arbiter PUFs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Strobel:2019:PMA,
  author =       "Manuel Strobel and Martin Radetzki",
  title =        "Power-mode-aware Memory Subsystem Optimization for
                 Low-power System-on-Chip Design",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "43:1--43:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3356583",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3356583",
  abstract =     "The memory subsystem is increasingly subject to an
                 intensive energy minimization effort in embedded and
                 System-on-Chip development. While the main focus is
                 typically put on energy consumption reduction, there
                 are other optimization aspects that become more and
                 more relevant as well, e.g., peak power constraints or
                 time budgets. In this regard, the present article makes
                 the following contributions. Taking industrial-grade
                 information into account, different Static
                 Random-Access Memory (SRAM) power modes and their
                 characteristics are presented at first. Using this
                 information, a comprehensive optimization model with
                 the main intention of energy minimization is defined.
                 It is based on memory access statistics that represent
                 the embedded software of interest, which allows for
                 application-tailored improvements. Further, it
                 considers different power states of the memory
                 subsystem and enables the definition of peak power and
                 time corridor constraints. The presented two-stage
                 implementation of this optimization model allows the
                 handling of large design spaces. Clearly defined
                 interfaces facilitate the exchange of individual
                 workflow parts in a plug-and-play fashion and further
                 enable a neat integration of our optimization method
                 with existing hardware/software (HW/SW) codesign
                 synthesis flows. A general evaluation for different
                 technology nodes yields that the optimization potential
                 of memory low-power modes increases with advancing
                 miniaturization but also depends on the data footprint
                 of the embedded software. Experimental results for a
                 set of benchmark applications confirm these findings
                 and provide energy savings of up to 90\% and over 60\%
                 on average compared to a monolithic memory layout
                 without low-power modes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Motamedi:2019:DNA,
  author =       "Mohammad Motamedi and Felix A. Portillo and Daniel
                 Fong and Soheil Ghiasi",
  title =        "{Distill-Net}: Application-Specific Distillation of
                 Deep Convolutional Neural Networks for
                 Resource-Constrained {IoT} Platforms",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "44:1--44:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3360512",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3360512",
  abstract =     "Many Internet-of-Things (IoT) applications demand fast
                 and accurate understanding of a few key events in their
                 surrounding environment. Deep Convolutional Neural
                 Networks (CNNs) have emerged as an effective approach
                 to understand speech, images, and similar
                 high-dimensional data types. Algorithmic performance of
                 modern CNNs, however, fundamentally relies on learning
                 class-agnostic hierarchical features that only exist in
                 comprehensive training datasets with many classes. As a
                 result, fast inference using CNNs trained on such
                 datasets is prohibitive for most resource-constrained
                 IoT platforms. To bridge this gap, we present a
                 principled and practical methodology for distilling a
                 complex modern CNN that is trained to effectively
                 recognize many different classes of input data into an
                 application-dependent essential core that not only
                 recognizes the few classes of interest to the
                 application accurately but also runs efficiently on
                 platforms with limited resources. Experimental results
                 confirm that our approach strikes a favorable balance
                 between classification accuracy (application
                 constraint), inference efficiency (platform
                 constraint), and productive development of new
                 applications (business constraint).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2019:RTA,
  author =       "Quan Zhou and Guohui Li and Jianjun Li and Chenggang
                 Deng and Ling Yuan",
  title =        "Response Time Analysis for Tasks with Fixed Preemption
                 Points under Global Scheduling",
  journal =      j-TECS,
  volume =       "18",
  number =       "5",
  pages =        "111:1--111:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3360513",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3360513",
  abstract =     "As an effective method for detecting the
                 schedulability of real-time tasks on multiprocessor
                 platforms, Response time analysis (RTA) has been deeply
                 researched in recent decades. Most of the existing RTA
                 methods are designed for tasks that can be preempted at
                 any time. However, in some real-time systems, a task
                 may have some fixed preemption points (FPPs) that
                 divide its execution into a series of non-preemptive
                 regions (NPRs). In such environments, the task can only
                 be preempted at its FPPs, which makes existing RTA
                 methods for arbitrary preemption tasks not applicable.
                 In this article, we study the schedulability analysis
                 on tasks with FPPs under both global fixed-priority
                 (G-FP) scheduling and global earliest deadline first
                 (G-EDF) scheduling. First, based on the idea of
                 limiting the time interval between two consecutive
                 executions of an NPR, a novel RTA method for tasks with
                 FPPs under G-FP scheduling is proposed. Second, we
                 propose an effective RTA method for tasks with FPPs
                 under G-EDF scheduling. Finally, extensive simulations
                 are conducted and the results validate the
                 effectiveness of the proposed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yu:2019:TND,
  author =       "Jiecao Yu and Andrew Lukefahr and Reetuparna Das and
                 Scott Mahlke",
  title =        "{TF-Net}: Deploying Sub-Byte Deep Neural Networks on
                 Microcontrollers",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "45:1--45:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358189",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358189",
  abstract =     "Deep Neural Networks (DNNs) have become an essential
                 component of various applications. While today's DNNs
                 are mainly restricted to cloud services, network
                 connectivity, energy, and data privacy problems make it
                 important to support efficient DNN computation on
                 low-cost, low-power processors like microcontrollers.
                 However, due to the constrained computation resources,
                 it is challenging to execute large DNN models on
                 microcontrollers. Using sub-byte low-precision input
                 activations and weights is a typical method to reduce
                 DNN computation. But on byte-addressable
                 microcontrollers, the sub-byte computation is not well
                 supported. The sub-byte inputs and weights need to be
                 unpacked from bitstreams before computation, which
                 incurs significant computation and energy overhead. In
                 this paper, we propose the TF-Net pipeline to
                 efficiently deploy sub-byte DNNs on microcontrollers.
                 While TF-Net allows for a range of weight and input
                 precision, we find Ternary weights and Four-bit inputs
                 provide the optimal balance between model accuracy,
                 computation performance, and energy efficiency. TF-Net
                 first includes a training framework for sub-byte
                 low-precision DNN models. Two algorithms are then
                 introduced to accelerate the trained models. The first,
                 direct buffer convolution, amortizes unpacking overhead
                 by caching unpacked inputs. The second, packed sub-byte
                 multiply-accumulate, utilizes a single multiplication
                 instruction to perform multiple sub-byte
                 multiply-accumulate computations. To further accelerate
                 DNN computation, we propose two instructions,
                 Multiply-Shift-Accumulate and Unpack, to extend the
                 existing microcontroller instruction set. On the tested
                 networks, TF-Net can help improve the computation
                 performance and energy efficiency by $ 1.83 \times $
                 and $ 2.28 \times $ on average, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Goncalves:2019:AER,
  author =       "Larissa Rozales Gon{\c{c}}alves and Rafael F{\~a}o {De
                 Moura} and Luigi Carro",
  title =        "Aggressive Energy Reduction for Video Inference with
                 Software-only Strategies",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358174",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358174",
  abstract =     "In the past years, several works have proposed custom
                 hardware and software-based techniques for the
                 acceleration of Convolutional Neural Networks (CNNs).
                 Most of these works focus on saving computations by
                 changing the used precision or modifying frame
                 processing. To reach a more aggressive energy
                 reduction, in this paper we propose software-only
                 modifications to the CNNs inference process. Our
                 approach exploits the inherent locality in videos by
                 replacing entire frame computations with a movement
                 prediction algorithm. Furthermore, when a frame must be
                 processed, we avoid energy-demanding floating-point
                 operations, and at the same time reduce memory accesses
                 by employing look-up tables in place of the original
                 convolutions. Using the proposed approach, one can
                 reach significant energy gains of more than $ 25 \times
                 $ for security cameras, and $ 12 \times $ for moving
                 vehicles applications, with only small software
                 modifications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2019:CCL,
  author =       "Jeff (Jun) Zhang and Parul Raj and Shuayb Zarar and
                 Amol Ambardekar and Siddharth Garg",
  title =        "{CompAct}: On-chip Compression of Activations for Low
                 Power Systolic Array Based {CNN} Acceleration",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358178",
  abstract =     "This paper addresses the design of systolic array (SA)
                 based convolutional neural network (CNN) accelerators
                 for mobile and embedded domains. On- and off-chip
                 memory accesses to the large activation inputs
                 (sometimes called feature maps) of CNN layers
                 contribute significantly to total energy consumption
                 for such accelerators; while prior has proposed
                 off-chip compression, activations are still stored
                 on-chip in uncompressed form, requiring either large
                 on-chip activation buffers or slow and energy-hungry
                 off-chip accesses. In this paper, we propose CompAct, a
                 new architecture that enables on-chip compression of
                 activations for SA based CNN accelerators. CompAct is
                 built around several key ideas. First, CompAct
                 identifies an SA schedule that has nearly regular
                 access patterns, enabling the use of a modified
                 run-length coding scheme (RLC). Second, CompAct
                 improves compression ratio of the RLC scheme using
                 Sparse-RLC in later CNN layers and Lossy-RLC in earlier
                 layers. Finally, CompAct proposes look-ahead snoozing
                 that operates synergistically with RLC to reduce the
                 leakage energy of activation buffers. Based on detailed
                 synthesis results, we show that CompAct enables up to
                 62\% reduction in activation buffer energy, and 34\%
                 reduction in total chip energy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Castro-Godinez:2019:EBE,
  author =       "Jorge Castro-God{\'\i}nez and Muhammad Shafique and
                 J{\"o}rg Henkel",
  title =        "{ECAx}: Balancing Error Correction Costs in
                 Approximate Accelerators",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358179",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358179",
  abstract =     "Approximate computing has emerged as a design paradigm
                 amenable to error-tolerant applications. It enables
                 trading the quality of results for efficiency
                 improvement in terms of delay, power, and energy
                 consumption under user-provided tolerable quality
                 degradation. Approximate accelerators have been
                 proposed to expedite frequently executing code sections
                 of error-resilient applications while meeting a defined
                 quality level. However, these accelerators may produce
                 unacceptable errors at run time if the input data
                 changes or dynamic adjustments are made for a defined
                 output quality constraint. State-of-the-art approaches
                 in approximate computing address this issue by
                 correctly re-computing those accelerator invocations
                 that produce unacceptable errors; this is achieved by
                 using the host processor or an alternate exact
                 accelerator, which is activated on-demand.
                 Nevertheless, such approaches can nullify the benefits
                 of approximate computing, especially when input data
                 variations are high at run time and errors due to
                 approximations are above a tolerable threshold. As a
                 robust and general solution to this problem, we propose
                 ECAx, a novel methodology to explore low-overhead error
                 correction in approximate accelerators by selectively
                 correcting most significant errors, in terms of their
                 magnitude, without losing the gains of approximations.
                 We particularly consider the case of approximate
                 accelerators built with approximate functional units
                 such as approximate adders. Our novel methodology
                 reduces the required exact re-computations on the host
                 processor, achieving up to 20\% performance gain
                 compared to state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhat:2019:ULE,
  author =       "Ganapati Bhat and Yigit Tuncel and Sizhe An and Hyung
                 Gyu Lee and Umit Y. Ogras",
  title =        "An Ultra-Low Energy Human Activity Recognition
                 Accelerator for Wearable Health Applications",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358175",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358175",
  abstract =     "Human activity recognition (HAR) has recently received
                 significant attention due to its wide range of
                 applications in health and activity monitoring. The
                 nature of these applications requires mobile or
                 wearable devices with limited battery capacity. User
                 surveys show that charging requirement is one of the
                 leading reasons for abandoning these devices. Hence,
                 practical solutions must offer ultra-low power
                 capabilities that enable operation on harvested energy.
                 To address this need, we present the first fully
                 integrated custom hardware accelerator (HAR engine)
                 that consumes 22.4 $ \mu $J per operation using a
                 commercial 65 nm technology. We present a complete
                 solution that integrates all steps of HAR, i.e.,
                 reading the raw sensor data, generating features, and
                 activity classification using a deep neural network
                 (DNN). It achieves 95\% accuracy in recognizing 8
                 common human activities while providing three orders of
                 magnitude higher energy efficiency compared to existing
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wijerathne:2019:CHT,
  author =       "Dhananjaya Wijerathne and Zhaoying Li and Manupa
                 Karunarathne and Anuj Pathania and Tulika Mitra",
  title =        "{CASCADE}: High Throughput Data Streaming via
                 Decoupled Access-Execute {CGRA}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358177",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358177",
  abstract =     "A Coarse-Grained Reconfigurable Array (CGRA) is a
                 promising high-performance low-power accelerator for
                 compute-intensive loop kernels. While the mapping of
                 the computations on the CGRA is a well-studied problem,
                 bringing the data into the array at a high throughput
                 remains a challenge. A conventional CGRA design
                 involves on-array computations to generate memory
                 addresses for data access undermining the attainable
                 throughput. A decoupled access-execute architecture, on
                 the other hand, isolates the memory access from the
                 actual computations resulting in a significantly higher
                 throughput. We propose a novel decoupled access-execute
                 CGRA design called CASCADE with full architecture and
                 compiler support for high-throughput data streaming
                 from an on-chip multi-bank memory. CASCADE offloads the
                 address computations for the multi-bank data memory
                 access to a custom designed programmable hardware. An
                 end-to-end fully-automated compiler synchronizes the
                 conflict-free movement of data between the memory banks
                 and the CGRA. Experimental evaluations show on average
                 $ 3 \times $ performance benefit and $ 2.2 \times $
                 performance per watt improvement for CASCADE compared
                 to an iso-area conventional CGRA with a bigger
                 processing array in lieu of a dedicated hardware memory
                 address generation logic.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Restuccia:2019:YBA,
  author =       "Francesco Restuccia and Marco Pagani and Alessandro
                 Biondi and Mauro Marinoni and Giorgio Buttazzo",
  title =        "Is Your Bus Arbiter Really Fair? {Restoring} Fairness
                 in {AXI} Interconnects for {FPGA SoCs}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358183",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358183",
  abstract =     "AMBA AXI is a popular bus protocol that is widely
                 adopted as the medium to exchange data in
                 field-programmable gate array system-on-chips (FPGA
                 SoCs). The AXI protocol does not specify how
                 conflicting transactions are arbitrated and hence the
                 design of bus arbiters is left to the vendors that
                 adopt AXI. Typically, a round-robin arbitration is
                 implemented to ensure a fair access to the bus by the
                 master nodes, as for the popular SoCs by Xilinx. This
                 paper addresses a critical issue that can arise when
                 adopting the AXI protocol under round-robin
                 arbitration; specifically, in the presence of bus
                 transactions with heterogeneous burst sizes. First, it
                 is shown that a completely unfair bandwidth
                 distribution can be achieved under some configurations,
                 making possible to arbitrarily decrease the bus
                 bandwidth of a target master node. This issue poses
                 serious performance, safety, and security concerns.
                 Second, a low-latency (one clock cycle) module named
                 AXI burst equalizer (ABE) is proposed to restore
                 fairness. Our investigations and proposals are
                 supported by implementations and tests upon three
                 modern SoCs. Experimental results are reported to
                 confirm the existence of the issue and assess the
                 effectiveness of the ABE with bus traffic generators
                 and hardware accelerators from the Xilinx's IP
                 library.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mandal:2019:APM,
  author =       "Sumit K. Mandal and Raid Ayoub and Michael Kishinevsky
                 and Umit Y. Ogras",
  title =        "Analytical Performance Models for {NoCs} with Multiple
                 Priority Traffic Classes",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "52:1--52:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358176",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358176",
  abstract =     "Networks-on-chip (NoCs) have become the standard for
                 interconnect solutions in industrial designs ranging
                 from client CPUs to many-core chip-multiprocessors.
                 Since NoCs play a vital role in system performance and
                 power consumption, pre-silicon evaluation environments
                 include cycle-accurate NoC simulators. Long simulations
                 increase the execution time of evaluation frameworks,
                 which are already notoriously slow, and prohibit
                 design-space exploration. Existing analytical NoC
                 models, which assume fair arbitration, cannot replace
                 these simulations since industrial NoCs typically
                 employ priority schedulers and multiple priority
                 classes. To address this limitation, we propose a
                 systematic approach to construct priority-aware
                 analytical performance models using micro-architecture
                 specifications and input traffic. Our approach
                 decomposes the given NoC into individual queues with
                 modified service time to enable accurate and scalable
                 latency computations. Specifically, we introduce novel
                 transformations along with an algorithm that
                 iteratively applies these transformations to decompose
                 the queuing system. Experimental evaluations using real
                 architectures and applications show high accuracy of
                 97\% and up to $ 2.5 \times $ speedup in full-system
                 simulation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Song:2019:EEP,
  author =       "Shihao Song and Anup Das and Onur Mutlu and Nagarajan
                 Kandasamy",
  title =        "Enabling and Exploiting Partition-Level Parallelism
                 {(PALP)} in Phase Change Memories",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "53:1--53:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358180",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358180",
  abstract =     "Phase-change memory (PCM) devices have multiple banks
                 to serve memory requests in parallel. Unfortunately, if
                 two requests go to the same bank, they have to be
                 served one after another, leading to lower system
                 performance. We observe that a modern PCM bank is
                 implemented as a collection of partitions that operate
                 mostly independently while sharing a few global
                 peripheral structures, which include the sense
                 amplifiers (to read) and the write drivers (to write).
                 Based on this observation, we propose PALP, a new
                 mechanism that enables partition-level parallelism
                 within each PCM bank, and exploits such parallelism by
                 using the memory controller's access scheduling
                 decisions. PALP consists of three new contributions.
                 First, we introduce new PCM commands to enable
                 parallelism in a bank's partitions in order to resolve
                 the read-write bank conflicts, with no changes needed
                 to PCM logic or its interface. Second, we propose
                 simple circuit modifications that introduce a new
                 operating mode for the write drivers, in addition to
                 their default mode of serving write requests. When
                 configured in this new mode, the write drivers can
                 resolve the read-read bank conflicts, working jointly
                 with the sense amplifiers. Finally, we propose a new
                 access scheduling mechanism in PCM that improves
                 performance by prioritizing those requests that exploit
                 partition-level parallelism over other requests,
                 including the long outstanding ones. While doing so,
                 the memory controller also guarantees
                 starvation-freedom and the PCM's
                 running-average-power-limit (RAPL). We evaluate PALP
                 with workloads from the MiBench and SPEC CPU2017
                 Benchmark suites. Our results show that PALP reduces
                 average PCM access latency by 23\%, and improves
                 average system performance by 28\% compared to the
                 state-of-the-art approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sridhar:2019:SEC,
  author =       "Aditya Sridhar and Mohamed Ibrahim and Krishnendu
                 Chakrabarty",
  title =        "Synterface: Efficient Chip-to-World Interfacing for
                 Flow-Based Microfluidic Biochips Using Pin-Count
                 Minimization",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "54:1--54:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358188",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358188",
  abstract =     "Flow-based microfluidic biochips can be used to
                 perform bioassays by manipulating a large number of
                 on-chip valves. These biochips are increasingly used
                 today for biomolecular recognition, single-cell
                 screening, and point-of-care disease diagnostics, and
                 design-automation solutions for flow-based
                 microfluidics enable the mapping and optimization of
                 bimolecular protocols and software-based valve control.
                 However, a key problem that has not received adequate
                 attention is chip-to-world interfacing, which requires
                 the use of off-chip control equipment to provide
                 control signals for the on-chip valves. This problem is
                 exacerbated by the increase in the number of valves as
                 chips get more complex. To address the interfacing
                 problem, we present an efficient pin-count minimization
                 (synthesis) problem, referred to as Synterface, which
                 uses on-chip microfluidic logic gates and optimization
                 based on concepts from linear algebra. We present
                 results to show that Synterface significantly reduces
                 pin-count and simplifies the external interface for
                 flow-based microfluidics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2019:OBI,
  author =       "Minsu Kim and Jeong-Keun Park and Sungyeol Kim and
                 Insu Yang and Hyunsoo Jung and Soo-Mook Moon",
  title =        "Output-based Intermediate Representation for
                 Translation of Test-pattern Program",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "55:1--55:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358186",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358186",
  abstract =     "An Intermediate Representation (IR) used by compilers
                 is normally generated statically, as a result of
                 parsing or analyzing the source program. This paper
                 proposes a completely different type of IR, generated
                 as a result of running the source program, the
                 output-based IR. There is a practical translation
                 problem where such an IR is useful, in the domain of
                 test-pattern programs. Test-pattern programs run on ATE
                 (automatic test equipment), a special embedded system
                 to test semiconductors such as DRAMs. They generate a
                 pattern for each clock, a bit vector input to the pins
                 of the chip. One issue is that different ATEs require
                 different programming since each ATE manufacturer has
                 its own programming language. Nonetheless, we should be
                 able to test a memory chip on different ATEs as long as
                 they generate the same patterns with the same speed.
                 Therefore, a memory chipmaker wants to make a pattern
                 program portable across ATEs, to fully utilize their
                 ATE resources. One solution is translating between
                 pattern programs, for which we need an IR since there
                 are multiple source ATEs and target ATEs. Instead of a
                 conventional, static IR, we propose using the output
                 pattern itself as an IR. Since the pattern is
                 independent of ATEs and easily obtainable, the
                 output-based IR obviates designing a static IR
                 considering all ATE programming languages and hardware
                 differences. Moreover, we might synthesize a better
                 target program from the IR, more optimized to the
                 target ATE. However, the full pattern generated by a
                 product-level pattern program is huge, so we propose
                 using an IR of abbreviated patterns, annotated with the
                 repetition information obtained while executing the
                 source program. Our experimental results with
                 product-level pattern programs show that our approach
                 is feasible.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Silva:2019:RFG,
  author =       "Lucas Bragan{\c{c}}a {Da Silva} and Ricardo Ferreira
                 and Michael Canesche and Marcelo M. Menezes and Maria
                 D. Vieira and Jeronimo Penha and Peter Jamieson and
                 Jos{\'e} Augusto M. Nacif",
  title =        "{READY}: a Fine-Grained Multithreading Overlay
                 Framework for Modern {CPU--FPGA} Dataflow
                 Applications",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "56:1--56:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358187",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358187",
  abstract =     "In this work, we propose a framework called
                 REconfigurable Accelerator DeploY (READY), the first
                 framework to support polynomial runtime mapping of
                 dataflow applications in high-performance CPU-FPGA
                 platforms. READY introduces an efficient mapping with
                 fine-grained multithreading onto an overlay
                 architecture that hides the latency of a global
                 interconnection network. In addition to our overlay
                 architecture, we show how this system helps solve some
                 of the challenges for FPGA cloud computing adoption in
                 high-performance computing. The framework encapsulates
                 dataflow descriptions by using a target independent,
                 high-level API, and a dataflow model that allows for
                 explicit spatial and temporal parallelism. READY
                 directly maps the dataflow kernels onto the
                 accelerator. Our tool is flexible and extensible and
                 provides the infrastructure to explore different
                 accelerator designs. We validate READY on the Intel
                 Harp platform, and our experimental results show an
                 average 2x execution runtime improvement when compared
                 to an 8-thread multi-core processor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2019:MOE,
  author =       "Sunghyun Park and Youfeng Wu and Janghaeng Lee and
                 Amir Aupov and Scott Mahlke",
  title =        "Multi-objective Exploration for Practical Optimization
                 Decisions in Binary Translation",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "57:1--57:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358185",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358185",
  abstract =     "In the design of mobile systems, hardware/software
                 (HW/SW) co-design has important advantages by creating
                 specialized hardware for the performance or power
                 optimizations. Dynamic binary translation (DBT) is a
                 key component in co-design. During the translation, a
                 dynamic optimizer in the DBT system applies various
                 software optimizations to improve the quality of the
                 translated code. With dynamic optimization,
                 optimization time is an exposed run-time overhead and
                 useful analyses are often restricted due to their high
                 costs. Thus, a dynamic optimizer needs to make smart
                 decisions with limited analysis information, which
                 complicates the design of optimization decision models
                 and often causes failures in human-made heuristics. In
                 mobile systems, this problem is even more challenging
                 because of strict constraints on computing capabilities
                 and memory size. To overcome the challenge, we
                 investigate an opportunity to build practical
                 optimization decision models for DBT by using machine
                 learning techniques. As the first step, loop unrolling
                 is chosen as the representative optimization. We base
                 our approach on the industrial strength DBT
                 infrastructure and conduct evaluation with 17,116
                 unrollable loops collected from 200 benchmarks and
                 real-life programs across various domains. By utilizing
                 all available features that are potentially important
                 for loop unrolling decision, we identify the best
                 classification algorithm for our infrastructure with
                 consideration for both prediction accuracy and cost.
                 The greedy feature selection algorithm is then applied
                 to the classification algorithm to distinguish its
                 significant features and cut down the feature space. By
                 maintaining significant features only, the best
                 affordable classifier, which satisfies the budgets
                 allocated to the decision process, shows 74.5\% of
                 prediction accuracy for the optimal unroll factor and
                 realizes an average 20.9\% reduction in dynamic
                 instruction count during the steady-state translated
                 code execution. For comparison, the best baseline
                 heuristic achieves 46.0\% prediction accuracy with an
                 average 13.6\% instruction count reduction. Given that
                 the infrastructure is already highly optimized and the
                 ideal upper bound for instruction reduction is observed
                 at 23.8\%, we believe this result is noteworthy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Durr:2019:EET,
  author =       "Marco D{\"u}rr and Georg {Von Der Br{\"u}ggen} and
                 Kuan-Hsun Chen and Jian-Jia Chen",
  title =        "End-to-End Timing Analysis of Sporadic Cause-Effect
                 Chains in Distributed Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "58:1--58:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358181",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358181",
  abstract =     "A cause-effect chain is used to define the logical
                 order of data dependent tasks, which is independent
                 from the execution order of the jobs of the
                 (periodic/sporadic) tasks. Analyzing the worst-case
                 End-to-End timing behavior, associated to a
                 cause-effect chain, is an important problem in embedded
                 control systems. For example, the detailed timing
                 properties of modern automotive systems are specified
                 in the AUTOSAR Timing Extensions. In this paper, we
                 present a formal End-to-End timing analysis for
                 distributed systems. We consider the two most important
                 End-to-End timing semantics, i.e., the button-to-action
                 delay (termed as the maximum reaction time) and the
                 worst-case data freshness (termed as the maximum data
                 age). Our contribution is significant due to the
                 consideration of the sporadic behavior of job
                 activations, whilst the results in the literature have
                 been mostly limited to periodic activations. The proof
                 strategy shows the (previously unexplored) connection
                 between the reaction time (data age, respectively) and
                 immediate forward (backward, respectively) job chains.
                 Our analytical results dominate the state of the art
                 for sporadic task activations in distributed systems
                 and the evaluations show a clear improvement for
                 synthesized task systems as well as for a real world
                 automotive benchmark setting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leipnitz:2019:HLS,
  author =       "Marcos T. Leipnitz and Gabriel L. Nazar",
  title =        "High-Level Synthesis of Approximate Designs under
                 Real-Time Constraints",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "59:1--59:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358182",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358182",
  abstract =     "The adoption of High-Level Synthesis (HLS) has
                 increased as the latest HLS tools have evolved to
                 provide high-quality results while improving
                 productivity and time-to-market. Concurrently, many
                 works have been proposing the incorporation of
                 approximate computing techniques within HLS toolchains,
                 allowing automated generation of inexact circuits for
                 error-tolerant application domains with the aim of
                 trading-off computation accuracy with area/power
                 savings or performance improvements. Thus, when
                 attempting to make a design meet timing requirements,
                 designers of real-time systems using HLS may resort to
                 approximation approaches. However, current approximate
                 HLS tools do not allow specifying real-time
                 constraints, being instead error-constrained to explore
                 area, power, or performance optimizations. In this
                 work, we propose an approximate HLS framework for
                 real-time systems that can be integrated with
                 state-of-the-art HLS tools. With this framework
                 designers can specify real-time constraints and satisfy
                 them while minimizing the output error. It uses
                 scheduling information and Worst-Case Execution Time
                 (WCET) analysis for iteratively exploring time-error
                 trade-offs of approximations in the time-critical
                 execution path. Experimental results on signal and
                 image processing benchmarks show that we can reduce the
                 WCET of exact designs by up to 35\% with acceptable
                 quality degradation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saeed:2019:LDB,
  author =       "Samah Mohamed Saeed and Robert Wille and Ramesh
                 Karri",
  title =        "Locking the Design of Building Blocks for Quantum
                 Circuits",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "60:1--60:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358184",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358184",
  abstract =     "The research community expects that quantum computers
                 will give economical results for particular problems on
                 which the classical computers break down. Examples
                 include factoring of large numbers, searching in a big
                 database, or simulating chemical reactions to design
                 new drugs. Attempts are ongoing to build up a practical
                 quantum computer. Users (clients) can implement quantum
                 circuits to run on these quantum computers. However,
                 before running the quantum circuit on the quantum
                 computer, the users (clients) should compile, optimize,
                 decompose, and technology map the quantum circuit. In
                 the current embodiment, the resulting quantum circuit
                 runs on a remote and untrusted quantum computer server
                 --- introducing security risks. This study explores the
                 risk of outsourcing the quantum circuit to the quantum
                 computer by focusing on quantum oracles. Quantum
                 oracles are pivotal building blocks and require
                 specialized expertise and means to design. Hence, the
                 designer may protect this proprietary quantum oracle
                 intellectual property (IP) and hide his/her private
                 information. We investigate how to manage that on a
                 quantum computer server using the IBM project QX
                 quantum computer and Qiskit tools as an exemplar.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mohanty:2019:SPE,
  author =       "Ram Prasad Mohanty and Hasindu Gamaarachchi and Andrew
                 Lambert and Sri Parameswaran",
  title =        "{SWARAM}: Portable Energy and Cost Efficient Embedded
                 System for Genomic Processing",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "61:1--61:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358211",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358211",
  abstract =     "Treatment of patients using high-quality precision
                 medicine requires a thorough understanding of the
                 genetic composition of a patient. Ideally, the
                 identification of unique variations in an individual's
                 genome is needed for specifying the necessary
                 treatment. Variant calling workflow is a pipeline of
                 tools, integrating state of the art software systems
                 aimed at alignment, sorting and variant calling for the
                 whole genome sequencing (WGS) data. This pipeline is
                 utilized for identifying unique variations in an
                 individual's genome (compared to a reference genome).
                 Currently, such a workflow is implemented on
                 high-performance computers (with additional GPUs or
                 FPGAs) or in cloud computers. Such systems are large,
                 have a high cost, and rely on the internet for genome
                 data transfer which makes the system unusable in remote
                 locations unequipped with internet connectivity. It
                 further raises privacy concerns due to processing being
                 carried out in a different facility. To overcome such
                 limitations, in this paper, for the first time, we
                 present a cost-efficient, offline, scalable, portable,
                 and energy-efficient computing system named SWARAM for
                 variant calling workflow processing. The system uses
                 novel architecture and algorithms to match against
                 partial reference genomes to exploit smaller memory
                 sizes which are typically available in tiny processing
                 systems. Extensive tests on a standard benchmark
                 data-set (NA12878 Illumina platinum genome) confirm
                 that the time consumed for the data transfer and
                 completing variant calling workflow on SWARAM was
                 competitive to that of a 32-core Intel Xeon server with
                 similar accuracy, but costs less than a fifth, and
                 consumes less than 40\% of the energy of the server
                 system. The original scripts and code we developed for
                 executing the variant calling workflow on SWARAM are
                 available in the associated Github repository
                 https://github.com/Rammohanty/swaram.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kim:2019:AAI,
  author =       "Jihye Kim and Jiwon Lee and Hankyung Ko and Donghwan
                 Oh and Semin Han and Gwonho Jeong and Hyunok Oh",
  title =        "{AuthCropper}: Authenticated Image Cropper for Privacy
                 Preserving Surveillance Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "62:1--62:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358195",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358195",
  abstract =     "As surveillance systems are popular, the privacy of
                 the recorded video becomes more important. On the other
                 hand, the authenticity of video images should be
                 guaranteed when used as evidence in court. It is
                 challenging to satisfy both (personal) privacy and
                 authenticity of a video simultaneously, since the
                 privacy requires modifications (e.g., partial
                 deletions) of an original video image while the
                 authenticity does not allow any modifications of the
                 original image. This paper proposes a novel method to
                 convert an encryption scheme to support partial
                 decryption with a constant number of keys and construct
                 a privacy-aware authentication scheme by combining with
                 a signature scheme. The security of our proposed scheme
                 is implied by the security of the underlying encryption
                 and signature schemes. Experimental results show that
                 the proposed scheme can handle the UHD video stream
                 with more than 17 fps on a real embedded system, which
                 validates the practicality of the proposed scheme.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fong:2019:ODS,
  author =       "Daniel D. Fong and Vivek J. Srinivasan and Kourosh
                 Vali and Soheil Ghiasi",
  title =        "Optode Design Space Exploration for Clinically-robust
                 Non-invasive Fetal Oximetry",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "63:1--63:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358207",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358207",
  abstract =     "Non-invasive transabdominal fetal oximetry (TFO) has
                 the potential to improve delivery outcomes by providing
                 physicians with an objective metric of fetal well-being
                 during labor. Fundamentally, the technology is based on
                 sending light through the maternal abdomen to
                 investigate deep fetal tissue, followed by detection
                 and processing of the light that returns (via
                 scattering) to the outside of the maternal abdomen. The
                 placement of the photodetector in relation to the light
                 source critically impacts TFO system performance,
                 including its operational robustness in the face of
                 fetal depth variation. However, anatomical differences
                 between pregnant women cause the fetal depths to vary
                 drastically, which further complicates the optical
                 probe (optode) design optimization. In this paper, we
                 present a methodology to solve this problem. We frame
                 optode design space exploration as a multi-objective
                 optimization problem, where hardware complexity (cost)
                 and performance across a wider patient population
                 (robustness) form competing objectives. We propose a
                 model-based approach to characterize the Pareto-optimal
                 points in the optode design space, through which a
                 specific design is selected. Experimental evaluation
                 via simulation and in vivo measurement on pregnant
                 sheep support the efficacy of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Siddhu:2019:PLA,
  author =       "Lokesh Siddhu and Preeti Ranjan Panda",
  title =        "{PredictNcool}: Leakage Aware Thermal Management for
                 {$3$D} Memories Using a Lightweight Temperature
                 Predictor",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "64:1--64:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358208",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358208",
  abstract =     "Recent research on mitigating thermal problems in 3D
                 memories has covered reactive strategies that reduce
                 memory power consumption, and thereby, performance,
                 when the memory temperature reaches the maximum
                 operating limit. Such techniques could benefit from
                 temperature prediction and avoid unnecessary
                 invocations and state transitions of the thermal
                 management strategy. We develop an accurate steady
                 state temperature predictor for thermal management of
                 3D memories. We utilize the symmetries in the
                 floorplan, along with other design insights, to reduce
                 the predictor's model parameters, making it lightweight
                 and suitable for runtime thermal management. Using the
                 temperature prediction, we introduce PredictNcool, a
                 proactive thermal management strategy to reduce
                 application runtime and memory energy. We compare
                 PredictNcool with two recent thermal management
                 strategies and our experiments show that the proposed
                 optimization results in performance improvements of
                 28\% and 5\%, and memory subsystem energy reductions of
                 38\% and 12\% (on average).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ma:2019:RFD,
  author =       "Chenlin Ma and Zhaoyan Shen and Lei Han and Zili
                 Shao",
  title =        "{RMW-F}: a Design of {RMW-Free} Cache Using Built-in
                 {NAND-Flash} for {SMR} Storage",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "65:1--65:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358210",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358210",
  abstract =     "Shingled Magnetic Recording (SMR) disks have been
                 proposed as a high-density, non-volatile media and
                 precede traditional hard disk drives in both storing
                 capacity and cost. However, the intrinsic
                 characteristics of SMR disks raise a major performance
                 challenge named read-modify-write operations (RMWs)
                 that are time-consuming and can significantly degrade
                 the overall system performance. Current designs of SMR
                 disks usually adopt a persistent cache to alleviate the
                 negative effect brought by RMWs and the cache is used
                 as a first-level cache to buffer all the incoming
                 writes of the whole SMR storage system. In this paper,
                 we propose to change the functionality of the cache,
                 that is, the cache will no longer serve as a
                 first-level cache like previous. Incoming data are
                 distinguished according to their different write-back
                 behavior and those data which will incur RMWs will be
                 left in our built-in NAND flash cache called RMW-free
                 Cache (RMW-F) to eliminate the need of RMWs. Besides,
                 RMW-F improves the cleaning efficiency by a model that
                 takes both write-back cost and data popularity into
                 considerations. Our experimental results show that
                 RMW-F can achieve both system performance and cleaning
                 efficiency improvements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2019:ESW,
  author =       "Yu-Pei Liang and Tseng-Yi Chen and Yuan-Hao Chang and
                 Shuo-Han Chen and Kam-Yiu Lam and Wei-Hsin Li and
                 Wei-Kuan Shih",
  title =        "Enabling Sequential-write-constrained {B+}-tree Index
                 Scheme to Upgrade Shingled Magnetic Recording Storage
                 Performance",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "66:1--66:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358201",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358201",
  abstract =     "When a shingle magnetic recording (SMR) drive has been
                 widely applied to modern computer systems (e.g.,
                 archive file systems, big data computing systems, and
                 large-scale database systems), storage system
                 developers should thoroughly review whether current
                 designs (e.g., index schemes and data placements) are
                 appropriate for an SMR drive because of its sequential
                 write constraint. Through many prior works excellently
                 manage data in an SMR drive by integrating their
                 proposed solutions into the driver layer, an index
                 scheme over an SMR drive has never been optimized by
                 any previous works because managing index over the SMR
                 drive needs to jointly consider the properties of B$^+$
                 -tree and SMR natures (e.g., sequential write
                 constraint and zone partitions) in a host storage
                 system. Moreover, poor index management will result in
                 terrible storage performance because an index manager
                 is extensively used in file systems and database
                 applications. For optimizing the B$^+$ -tree index
                 structure over an SMR storage, this work identifies
                 performance overheads caused by the B$^+$ -tree index
                 structure in an SMR drive. By such observation, this
                 study proposes a sequential-write-constrained B$^+$
                 -tree index scheme, namely SW-B$^+$ tree, which
                 consists of an address redirection data structure, an
                 SMR-aware node allocation mechanism, and a
                 frequency-aware garbage collection strategy. According
                 to our experiments, the SW-B$^+$ tree can improve the
                 SMR storage performance 55\% on average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jiang:2019:ASL,
  author =       "Weiwen Jiang and Edwin H.-M. Sha and Xinyi Zhang and
                 Lei Yang and Qingfeng Zhuge and Yiyu Shi and Jingtong
                 Hu",
  title =        "Achieving Super-Linear Speedup across Multi-{FPGA} for
                 Real-Time {DNN} Inference",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "67:1--67:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358192",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358192",
  abstract =     "Real-time Deep Neural Network (DNN) inference with
                 low-latency requirement has become increasingly
                 important for numerous applications in both cloud
                 computing (e.g., Apple's Siri) and edge computing
                 (e.g., Google/Waymo's driverless car). FPGA-based DNN
                 accelerators have demonstrated both superior
                 flexibility and performance; in addition, for real-time
                 inference with low batch size, FPGA is expected to
                 achieve further performance improvement. However, the
                 performance gain from the single-FPGA design is
                 obstructed by the limited on-chip resource. In this
                 paper, we employ multiple FPGAs to cooperatively run
                 DNNs with the objective of achieving super-linear
                 speed-up against single-FPGA design. In implementing
                 such systems, we found two barriers that hinder us from
                 achieving the design goal: (1) the lack of a clear
                 partition scheme for each DNN layer to fully exploit
                 parallelism, and (2) the insufficient bandwidth between
                 the off-chip memory and the accelerator due to the
                 growing size of DNNs. To tackle these issues, we
                 propose a general framework, ``Super-LIP'', which can
                 support different kinds of DNNs. In this paper, we take
                 Convolutional Neural Network (CNN) as a vehicle to
                 illustrate Super-LIP. We first formulate an accurate
                 system-level model to support the exploration of best
                 partition schemes. Then, we develop a novel design
                 methodology to effectively alleviate the heavy loads on
                 memory bandwidth by moving traffic from memory bus to
                 inter-FPGA links. We implement Super-LIP based on
                 ZCU102 FPGA boards. Results demonstrate that Super-LIP
                 with 2 FPGAs can achieve $ 3.48 \times $ speedup,
                 compared to the state-of-the-art single-FPGA design.
                 What is more, as the number of FPGAs scales up, the
                 system latency can be further reduced while maintaining
                 high energy efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2019:ALA,
  author =       "Wei-Chen Wang and Yuan-Hao Chang and Tei-Wei Kuo and
                 Chien-Chung Ho and Yu-Ming Chang and Hung-Sheng Chang",
  title =        "Achieving Lossless Accuracy with Lossy Programming for
                 Efficient Neural-Network Training on {NVM}-Based
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "68:1--68:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358191",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358191",
  abstract =     "Neural networks over conventional computing platforms
                 are heavily restricted by the data volume and
                 performance concerns. While non-volatile memory offers
                 potential solutions to data volume issues, challenges
                 must be faced over performance issues, especially with
                 asymmetric read and write performance. Beside that,
                 critical concerns over endurance must also be resolved
                 before non-volatile memory could be used in reality for
                 neural networks. This work addresses the performance
                 and endurance concerns altogether by proposing a
                 data-aware programming scheme. We propose to consider
                 neural network training jointly with respect to the
                 data-flow and data-content points of view. In
                 particular, methodologies with approximate results over
                 Dual-SET operations were presented. Encouraging results
                 were observed through a series of experiments, where
                 great efficiency and lifetime enhancement is seen
                 without sacrificing the result accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2019:DAS,
  author =       "Zhengguo Chen and Quan Deng and Nong Xiao and Kirk
                 Pruhs and Youtao Zhang",
  title =        "{DWMAcc}: Accelerating Shift-based {CNNs} with Domain
                 Wall Memories",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "69:1--69:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358199",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358199",
  abstract =     "PIM (processing-in-memory) based hardware accelerators
                 have shown great potentials in addressing the
                 computation and memory access intensity of modern CNNs
                 (convolutional neural networks). While adopting NVM
                 (non-volatile memory) helps to further mitigate the
                 storage and energy consumption overhead, adopting
                 quantization, e.g., shift-based quantization, helps to
                 tradeoff the computation overhead and the accuracy
                 loss, integrating both NVM and quantization in hardware
                 accelerators leads to sub-optimal acceleration. In this
                 paper, we exploit the natural shift property of DWM
                 (domain wall memory) to devise DWMAcc, a DWM-based
                 accelerator with asymmetrical storage of weight and
                 input data, to speed up the inference phase of
                 shift-based CNNs. DWMAcc supports flexible shift
                 operations to enable fast processing with low
                 performance and area overhead. We then optimize it with
                 zero-sharing, input-reuse, and weight-share schemes.
                 Our experimental results show that, on average, DWMAcc
                 achieves $ 16.6 \times $ performance improvement and $
                 85.6 \times $ energy consumption reduction over a
                 state-of-the-art SRAM based design.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dave:2019:DEP,
  author =       "Shail Dave and Youngbin Kim and Sasikanth Avancha and
                 Kyoungwoo Lee and Aviral Shrivastava",
  title =        "{dMazeRunner}: Executing Perfectly Nested Loops on
                 Dataflow Accelerators",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "70:1--70:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358198",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358198",
  abstract =     "Dataflow accelerators feature simplicity,
                 programmability, and energy-efficiency and are
                 visualized as a promising architecture for accelerating
                 perfectly nested loops that dominate several important
                 applications, including image and media processing and
                 deep learning. Although numerous accelerator designs
                 are being proposed, how to discover the most efficient
                 way to execute the perfectly nested loop of an
                 application onto computational and memory resources of
                 a given dataflow accelerator (execution method) remains
                 an essential and yet unsolved challenge. In this paper,
                 we propose dMazeRunner --- to efficiently and
                 accurately explore the vast space of the different ways
                 to spatiotemporally execute a perfectly nested loop on
                 dataflow accelerators (execution methods). The novelty
                 of dMazeRunner framework is in: (i) a holistic
                 representation of the loop nests, that can succinctly
                 capture the various execution methods, (ii) accurate
                 energy and performance models that explicitly capture
                 the computation and communication patterns, data
                 movement, and data buffering of the different execution
                 methods, and (iii) drastic pruning of the vast search
                 space by discarding invalid solutions and the solutions
                 that lead to the same cost. Our experiments on various
                 convolution layers (perfectly nested loops) of popular
                 deep learning applications demonstrate that the
                 solutions discovered by dMazeRunner are on average $
                 9.16 \times $ better in Energy-Delay-Product (EDP) and
                 $ 5.83 \times $ better in execution time, as compared
                 to prior approaches. With additional pruning
                 heuristics, dMazeRunner reduces the search time from
                 days to seconds with a mere 2.56\% increase in EDP, as
                 compared to the optimal solution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Spellini:2019:CDM,
  author =       "Stefano Spellini and Michele Lora and Franco Fummi and
                 Sudipta Chattopadhyay",
  title =        "Compositional Design of Multi-Robot Systems Control
                 Software on {ROS}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "71:1--71:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358197",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358197",
  abstract =     "This paper presents a methodology that relies on
                 Assume-Guarantee Contracts to decompose the problem of
                 synthesizing control software for a multi-robot system.
                 Initially, each contract describes either a component (
                 e.g., a robot) or an aspect of the system. Then, the
                 design problem is decomposed into different synthesis
                 and verification sub-problems, allowing to tackle the
                 complexity involved in the design process. The design
                 problem is then recomposed by exploiting the
                 rigorousness provided by contracts. This allows us to
                 achieve system-level simulation capable to be used for
                 validating the entire design. Once validated, the
                 software synthesized during the process can be
                 integrated into Robot Operating System (ROS) nodes and
                 executed using state-of-the-practice packages and tools
                 for modern robotic systems. We apply the methodology to
                 generate a control strategy for an autonomous goods
                 transportation system. Our results show a massive
                 reduction of the time required to obtain automatically
                 the control software implementing a multi-robot
                 mission.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mendis:2019:ADU,
  author =       "Hashan Roshantha Mendis and Pi-Cheng Hsiu",
  title =        "Accumulative Display Updating for Intermittent
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "72:1--72:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358190",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358190",
  abstract =     "Electrophoretic displays are ideal for self-powered
                 systems, but currently require an uninterrupted power
                 supply to carry out the full display update cycle.
                 Although sensible for battery-powered devices, when
                 directly applied to intermittently-powered systems,
                 guaranteeing display update atomicity usually results
                 in repeated execution until completion or can incur
                 high hardware/software overheads, heavy programmer
                 intervention and large energy buffering requirements to
                 provide sufficient display update energy. This paper
                 introduces the concept, design and implementation of
                 accumulative display updating, which relaxes the
                 atomicity constraints of display updating, such that
                 the display update process can be accumulatively
                 completed across power cycles, without the need for
                 sufficient energy for the entire display update. To
                 allow for process logical continuity, we track the
                 update progress during execution and facilitate a safe
                 display shutdown procedure to overcome physical and
                 operability issues related to abrupt power failure.
                 Additionally, a context-aware updating policy is
                 proposed to handle data freshness issues, where the
                 delay in addressing new update requests can cause the
                 display contents to be in conflict with new data
                 available. Experimental results on a Texas Instruments
                 device with an integrated electrophoretic display show
                 that, compared to atomic display updating, our design
                 can significantly increase accurate forward progress,
                 decrease the average response time of display updating
                 and reduce time and energy wastage when displaying
                 fresh data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seyoum:2019:FFO,
  author =       "Biruk B. Seyoum and Alessandro Biondi and Giorgio C.
                 Buttazzo",
  title =        "{FLORA}: {FLoorplan} Optimizer for Reconfigurable
                 Areas in {FPGAs}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "73:1--73:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358202",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358202",
  abstract =     "Floorplanning is a mandatory step in the design of
                 hardware accelerators for FPGA platforms, especially
                 when adopting dynamic partial reconfiguration (DPR).
                 This paper presents FLORA, an automated floorplanner
                 based on optimization via Mixed-Integer Linear
                 Programming (MILP). The floorplanning problem is solved
                 by means of a novel fine-grained modeling strategy of
                 FPGA resources. Furthermore, differently from other
                 proposals, our approach takes into account several
                 realistic Partial Reconfiguration (PR) floorplanning
                 constraints on FPGAs. FLORA was compared against
                 state-of-the-art floorplanners by means of benchmark
                 suites, showing that it is capable of providing better
                 performance in terms of resource consumption, maximum
                 inter-region, wire-length, and running time required to
                 produce the solutions. Finally, FLORA was utilized to
                 generate placements for a partially-reconfigurable
                 video processing engine that was implemented on a
                 Xilinx Zynq-7020.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Moazzemi:2019:HFL,
  author =       "Kasra Moazzemi and Biswadip Maity and Saehanseul Yi
                 and Amir M. Rahmani and Nikil Dutt",
  title =        "{HESSLE--FREE}: Heterogeneous Systems Leveraging Fuzzy
                 Control for Runtime Resource Management",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "74:1--74:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358203",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358203",
  abstract =     "As computing platforms increasingly embrace
                 heterogeneity, runtime resource managers need to
                 efficiently, dynamically, and robustly manage shared
                 resources (e.g., cores, power budgets, memory
                 bandwidth). To address the complexities in
                 heterogeneous systems, state-of-the-art techniques that
                 use heuristics or machine learning have been proposed.
                 On the other hand, conventional control theory can be
                 used for formal guarantees, but may face unmanageable
                 complexity for modeling system dynamics of complex
                 heterogeneous systems. We address this challenge
                 through HESSLE-FREE (Heterogeneous Systems Leveraging
                 Fuzzy Control for Runtime Resource Management): an
                 approach leveraging fuzzy control theory that combines
                 the strengths of classical control theory together with
                 heuristics to form a light-weight, agile, and efficient
                 runtime resource manager for heterogeneous systems. We
                 demonstrate the efficacy of HESSLE-FREE executing on a
                 NVIDIA Jetson TX2 platform (containing a heterogeneous
                 multi-processor with a GPU) to show that HESSLE-FREE:
                 (1) provides opportunity for optimization in the
                 controller and stability analysis to enhance the
                 confidence in the reliability of the system; (2)
                 coordinates heterogeneous compute units to achieve
                 desired objectives (e.g., QoS, optimal power
                 references, FPS) efficiently and with lower complexity,
                 and (3) eases the burden of system specification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vashist:2019:UTS,
  author =       "Abhishek Vashist and Andrew Keats and Sai Manoj
                 Pudukotai Dinakarrao and Amlan Ganguly",
  title =        "Unified Testing and Security Framework for Wireless
                 Network-on-Chip Enabled Multi-Core Chips",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "75:1--75:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358212",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358212",
  abstract =     "On-chip wireless interconnects have been demonstrated
                 to improve the performance and energy consumption of
                 data communication in Network-on-Chips (NoCs). However,
                 the wireless interfaces (WIs) can be defective,
                 rendering these broken links severely affect the
                 performance. This makes manufacturing test of the WIs
                 critical. While analog testing of the transceivers is
                 possible, such methodologies are impractical in a
                 Wireless NoC (WiNoC) due to large overheads. In
                 addition to testing, security is another prominent
                 challenge in WiNoCs, as the security breach can happen
                 due to embedded hardware Trojans or through external
                 attacker exploiting the wireless medium. The typical
                 security measures used in general wireless networks are
                 not practical in a WiNoC due to unique network
                 architectures and performance requirements of such a
                 system. However, both testing and security defense can
                 potentially leverage a basic monitoring framework
                 which, can detect malfunctions or anomalies. Based on
                 this idea, we propose a unified architecture for
                 testing and attack detection and protection of on-chip
                 wireless interconnects. We adopt a Built-In-Self Test
                 (BIST) methodology to enable online monitoring of the
                 wireless interconnects which can also be reused for
                 monitoring the security threats. We focus on
                 manufacturing defects of the WIs for testing and
                 persistent jamming attack for the security measures, as
                 this kind of attack is most likely on wireless
                 communication systems. The BIST methodology is capable
                 of detecting faults in the wireless links with a low
                 aliasing probability of $ 2.32 \times 10^{-10} $.
                 Additionally, the proposed unified architecture is able
                 to detect the persistent jamming with an accuracy of
                 99.87\% and suffer $<$ 3\% communication bandwidth
                 degradation even in the presence of attacks from either
                 internal or external sources.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dugo:2019:CLC,
  author =       "Alexy Torres Aurora Dugo and Jean-Baptiste Lefoul and
                 Felipe Gohring {De Magalhaes} and Dahman Assal and
                 Gabriela Nicolescu",
  title =        "Cache Locking Content Selection Algorithms for
                 {ARINC-653} Compliant {RTOS}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "76:1--76:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358196",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358196",
  abstract =     "Avionic software is the subject of stringent real
                 time, determinism and safety constraints. Software
                 designers face several challenges, one of them being
                 the interferences that appear in common situations,
                 such as resource sharing. The interferences introduce
                 non-determinism and delays in execution time. One of
                 the main interference prone resources are cache
                 memories. In single-core processors, caches comprise
                 multiple private levels. This breaks the isolation
                 principle imposed by avionic standards, such as the
                 ARINC-653. This standard defines partitioned
                 architectures where one partition should never directly
                 interfere with another one. In cache-based
                 architectures, one partition can modify the cache
                 content of another partition. In this paper, we propose
                 a method based on cache locking to reduce the
                 non-determinism and the contention on lower level
                 memories while improving the time performances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Deshwal:2019:MMO,
  author =       "Aryan Deshwal and Nitthilan Kanappan Jayakodi and
                 Biresh Kumar Joardar and Janardhan Rao Doppa and Partha
                 Pratim Pande",
  title =        "{MOOS}: a Multi-Objective Design Space Exploration and
                 Optimization Framework for {NoC} Enabled Manycore
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "77:1--77:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358206",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358206",
  abstract =     "The growing needs of emerging applications has posed
                 significant challenges for the design of optimized
                 manycore systems. Network-on-Chip (NoC) enables the
                 integration of a large number of processing elements
                 (PEs) in a single die. To design optimized manycore
                 systems, we need to establish suitable trade-offs among
                 multiple objectives including power, performance, and
                 thermal. Therefore, we consider multi-objective design
                 space exploration (MO-DSE) problems arising in the
                 design of NoC-enabled manycore systems: placement of
                 PEs and communication links to optimize two or more
                 objectives (e.g., latency, energy, and throughput).
                 Existing algorithms to solve MO-DSE problems suffer
                 from scalability and accuracy challenges as size of the
                 design space and the number of objectives grow. In this
                 paper, we propose a novel framework referred as
                 Multi-Objective Optimistic Search (MOOS) that performs
                 adaptive design space exploration using a data-driven
                 model to improve the speed and accuracy of
                 multi-objective design optimization process. We apply
                 MOOS to design both 3D heterogeneous and homogeneous
                 manycore systems using Rodinia, PARSEC, and SPLASH2
                 benchmark suites. We demonstrate that MOOS improves the
                 speed of finding solutions compared to state-of-the-art
                 methods by up to 13X while uncovering designs that are
                 up to 20\% better in terms of NoC. The optimized 3D
                 manycore systems improve the EDP up to 38\% when
                 compared to 3D mesh-based designs optimized for the
                 placement of PEs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Smirnov:2019:IGM,
  author =       "Fedor Smirnov and Behnaz Pourmohseni and Michael
                 Gla{\ss} and J{\"u}rgen Teich",
  title =        "{IGOR}, Get Me the Optimum! {Prioritizing} Important
                 Design Decisions During the {DSE} of Embedded Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "78:1--78:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358204",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358204",
  abstract =     "Design Space Exploration (DSE) techniques for complex
                 embedded systems must cope with a huge variety of
                 applications and target architectures as well as a wide
                 spectrum of objectives and constraints. In particular,
                 existing design automation approaches are either
                 problem-independent, in that they do not exploit any
                 knowledge about the optimization problem at hand, or
                 are tailored to specific a priori assumptions about the
                 problem and/or a specific set of design objectives.
                 While the latter are only applicable within a very
                 limited scope of design problems, the former may
                 struggle to deliver high-quality solutions for problems
                 with large design spaces and/or complex design
                 objectives. As a remedy, we propose Importance-Guided
                 Order Rearrangement (IGOR) as a novel approach for DSE
                 of embedded systems. Instead of relying on an a priori
                 problem knowledge, IGOR uses a
                 machine-learning-inspired technique to dynamically
                 analyze the importance of design decisions, i.e., the
                 impact that these decisions-within the specific problem
                 that is being optimized-have on the quality of explored
                 problem solutions w.r.t. the given design objectives.
                 Throughout the DSE, IGOR uses this information to guide
                 the optimization towards the most promising regions of
                 the design space. Experimental results for a variety of
                 applications from different domains of embedded
                 computing and for different optimization scenarios give
                 evidence that the proposed approach is both scalable
                 and adaptable, as it can be used for the optimization
                 of systems described by several thousands constraints,
                 where it outperforms both problem-specific and
                 problem-independent optimization approaches and
                 achieves $ \epsilon $-dominance improvements of up to
                 95\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cheng:2019:AVE,
  author =       "Zhongqi Cheng and Rainer D{\"o}mer",
  title =        "Analyzing Variable Entanglement for Parallel
                 Simulation of {SystemC TLM-2.0} Models",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "79:1--79:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358194",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358194",
  abstract =     "The SystemC TLM-2.0 standard is widely used in modern
                 electronic system level design for better
                 interoperability and higher simulation speed. However,
                 TLM-2.0 has been identified as an obstacle for parallel
                 SystemC simulation due to the disappearance of
                 channels. Without a containment construct, simulation
                 threads are permitted to directly access data of other
                 modules and that makes it difficult to synchronize such
                 accesses as required by the SystemC execution
                 semantics. In this paper, we propose a compile time
                 approach to statically analyze potential conflicts
                 among threads in SystemC TLM-2.0 loosely- and
                 approximately-timed models. We introduce a new Socket
                 Call Path technique which provides the compiler with
                 socket binding information for precise static analysis.
                 We also propose an algorithm to analyze entangled
                 variable pairs. Experimental results show that our
                 approach is able to support automatically safe parallel
                 simulation of SystemC models with TLM-2.0 Blocking
                 Transport Interface, Direct Memory Interface and
                 Non-blocking Transport Interface, resulting in
                 impressive simulation speeds.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2019:ETM,
  author =       "Minjun Seo and Fadi Kurdahi",
  title =        "Efficient Tracing Methodology Using Automata
                 Processor",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "80:1--80:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358200",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358200",
  abstract =     "Tracing or trace interface has been used in various
                 ways to find system defects or bugs. As embedded
                 systems are increasingly used in safety-critical
                 applications, tracing can provide useful information
                 during system execution at runtime. Non-intrusive
                 tracing that does not affect system performance has
                 become especially important, but unfortunately, the
                 biggest obstacle to this approach was the vast amount
                 of real-time trace data, making it challenging to
                 address complex requirements with relatively limited
                 hardware implementations. Automata processors can be
                 programmed with a memory-like structure of automata and
                 have a structure specific to streaming data, large
                 capacity, and parallel processing functions. This paper
                 promotes the idea of high-level system-on-chip
                 monitoring using automata processors. We used a
                 safety-critical pacemaker application in the
                 experiments, described timed automata (TA)-based
                 requirements, and tested intentionally injected 4,000
                 random failures. The TA model converted for Automata
                 Processor to monitor system, correctness, and safety
                 properties achieved 100\% failure detection rate in the
                 experiment, and the detected failure is reported as
                 fast enough to allow enough extent for failure
                 recovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brais:2019:AAM,
  author =       "Hadi Brais and Preeti Ranjan Panda",
  title =        "{Alleria}: an Advanced Memory Access Profiling
                 Framework",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "81:1--81:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358193",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358193",
  abstract =     "Application analysis and simulation tools are used
                 extensively by embedded system designers to improve
                 existing optimization techniques or develop new ones.
                 We propose the Alleria framework to make it easier for
                 designers to comprehensively collect critical
                 information such as virtual and physical memory
                 addresses, accessed values, and thread schedules about
                 one or more target applications. Such profilers often
                 incur substantial performance overheads that are orders
                 of magnitude larger than native execution time. We
                 discuss how that overhead can be significantly reduced
                 using a novel profiling mechanism called adaptive
                 profiling. We develop a heuristic-based adaptive
                 profiling mechanism and evaluate its performance using
                 single-threaded and multi-threaded applications. The
                 proposed technique can improve profiling throughput by
                 up to 145\% and by 37\% on an average, enabling Alleria
                 to be used to comprehensively profile applications with
                 a throughput of over 3 million instructions per
                 second.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhardwaj:2019:MCA,
  author =       "Kartikeya Bhardwaj and Ching-Yi Lin and Anderson
                 Sartor and Radu Marculescu",
  title =        "Memory- and Communication-Aware Model Compression for
                 Distributed Deep Learning Inference on {IoT}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "82:1--82:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358205",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358205",
  abstract =     "Model compression has emerged as an important area of
                 research for deploying deep learning models on
                 Internet-of-Things (IoT). However, for extremely
                 memory-constrained scenarios, even the compressed
                 models cannot fit within the memory of a single device
                 and, as a result, must be distributed across multiple
                 devices. This leads to a distributed inference paradigm
                 in which memory and communication costs represent a
                 major bottleneck. Yet, existing model compression
                 techniques are not communication-aware. Therefore, we
                 propose Network of Neural Networks (NoNN), a new
                 distributed IoT learning paradigm that compresses a
                 large pretrained `teacher' deep network into several
                 disjoint and highly-compressed `student' modules,
                 without loss of accuracy. Moreover, we propose a
                 network science-based knowledge partitioning algorithm
                 for the teacher model, and then train individual
                 students on the resulting disjoint partitions.
                 Extensive experimentation on five image classification
                 datasets, for user-defined memory/performance budgets,
                 show that NoNN achieves higher accuracy than several
                 baselines and similar accuracy as the teacher model,
                 while using minimal communication among students.
                 Finally, as a case study, we deploy the proposed model
                 for CIFAR-10 dataset on edge devices and demonstrate
                 significant improvements in memory footprint (up to $
                 24 \times $), performance (up to $ 12 \times $), and
                 energy per node (up to $ 14 \times $) compared to the
                 large teacher model. We further show that for
                 distributed inference on multiple edge devices, our
                 proposed NoNN model results in up to $ 33 \times $
                 reduction in total latency w.r.t. a state-of-the-art
                 model compression baseline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Barijough:2019:QLA,
  author =       "Kamyar Mirzazad Barijough and Zhuoran Zhao and Andreas
                 Gerstlauer",
  title =        "Quality\slash Latency-Aware Real-time Scheduling of
                 Distributed Streaming {IoT} Applications",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "83:1--83:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358209",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358209",
  abstract =     "Embedded systems are increasingly networked and
                 distributed, often, such as in the Internet of Things
                 (IoT), over open networks with potentially unbounded
                 delays. A key challenge is the need for real-time
                 guarantees over such inherently unreliable and
                 unpredictable networks. Generally, timeouts are used to
                 provide timing guarantees while trading off data losses
                 and quality. The schedule of distributed task
                 executions and network timeouts thereby determines a
                 fundamental latency-quality trade-off that is, however,
                 not taken into account by existing scheduling
                 algorithms. In this paper, we propose an approach for
                 scheduling of distributed, real-time streaming
                 applications under quality-latency goals. We formulate
                 this as a problem of analytically deriving a static
                 worst-case schedule of a given distributed dataflow
                 graph that minimizes quality loss while meeting
                 guaranteed latency constraints. Towards this end, we
                 first develop a quality model that estimates SNR of
                 distributed streaming applications under given network
                 characteristics and an overall linearity assumption.
                 Using this quality model, we then formulate and solve
                 the scheduling of distributed dataflow graphs as a
                 numerical optimization problem. Simulation results with
                 random graphs show that quality/latency-aware
                 scheduling improves SNR over a baseline schedule by
                 50\% on average. When applied to a distributed neural
                 network application for handwritten digit recognition,
                 our scheduling methodology can improve classification
                 accuracy by 10\% over a naive distribution under tight
                 latency constraints.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2019:DES,
  author =       "Youchao Wang and Sam Willis and Vasileios Tsoutsouras
                 and Phillip Stanley-Marbell",
  title =        "Deriving Equations from Sensor Data Using Dimensional
                 Function Synthesis",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "84:1--84:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358218",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358218",
  abstract =     "We present a new method for deriving functions that
                 model the relationship between multiple signals in a
                 physical system. The method, which we call dimensional
                 function synthesis, applies to data streams where the
                 dimensions of the signals are known. The method
                 comprises two phases: a compile-time synthesis phase
                 and a subsequent calibration using sensor data. We
                 implement dimensional function synthesis and use the
                 implementation to demonstrate efficiently summarizing
                 multi-modal sensor data for two physical systems using
                 90 laboratory experiments and 10,000 synthetic
                 idealized measurements. We evaluate the performance of
                 the compile-time phase of dimensional function
                 synthesis as well as the calibration phase overhead,
                 inference latency, and accuracy of the models our
                 method generates. The results show that our technique
                 can generate models in less than 300 ms on average
                 across all the physical systems we evaluated. When
                 calibrated with sensor data, our models outperform
                 traditional regression and neural network models in
                 inference accuracy in all the cases we evaluated. In
                 addition, our models perform better in training latency
                 (over $ 8660 \times $ improvement) and required
                 arithmetic operations in inference (over $ 34 \times $
                 improvement). These significant gains are largely the
                 result of exploiting information on the physics of
                 signals that has hitherto been ignored.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dai:2019:DMS,
  author =       "Xiaotian Dai and Wanli Chang and Shuai Zhao and Alan
                 Burns",
  title =        "A Dual-Mode Strategy for Performance-Maximisation and
                 Resource-Efficient {CPS} Design",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "85:1--85:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358213",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358213",
  abstract =     "The emerging scenarios of cyber-physical systems
                 (CPS), such as autonomous vehicles, require
                 implementing complex functionality with limited
                 resources, as well as high performances. This paper
                 considers a common setup in which multiple control and
                 non-control tasks share one processor, and proposes a
                 dual-mode strategy. The control task switches between
                 two sampling periods when rejecting (coping with) a
                 disturbance. We create an optimisation framework
                 looking for the switching sampling periods and time
                 instants that maximise the control performance (indexed
                 by settling time) and resource efficiency (indexed by
                 the number of tasks that are schedulable on the
                 processor). The latter objective is enabled with
                 schedulability analysis tailored for the dual-mode
                 model. Experimental results show that (i) given a set
                 of tasks, the proposed strategy improves the control
                 performances whilst retaining schedulability; and (ii)
                 given requirements on the control performances, the
                 proposed strategy is able to schedule more tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Passerone:2019:CEC,
  author =       "Roberto Passerone and {\'I}{\~n}igo {\'I}ncer Romeo
                 and Alberto L. Sangiovanni-Vincentelli",
  title =        "Coherent Extension, Composition, and Merging Operators
                 in Contract Models for System Design",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "86:1--86:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358216",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358216",
  abstract =     "Contract models have been proposed to promote and
                 facilitate reuse and distributed development. In this
                 paper, we cast contract models into a coherent
                 formalism used to derive general results about the
                 properties of their operators. We study several
                 extensions of the basic model, including the
                 distinction between weak and strong assumptions and
                 maximality of the specification. We then analyze the
                 disjunction and conjunction operators, and show how
                 they can be broken up into a sequence of simpler
                 operations. This leads to the definition of a new
                 contract viewpoint merging operator, which better
                 captures the design intent in contrast to the more
                 traditional conjunction. The adjoint operation, which
                 we call separation, can be used to re-partition the
                 specification into different viewpoints. We show the
                 symmetries of these operations with respect to
                 composition and quotient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bataineh:2019:EDL,
  author =       "Omar Bataineh and David S. Rosenblum and Mark
                 Reynolds",
  title =        "Efficient Decentralized {LTL} Monitoring Framework
                 Using Tableau Technique",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "87:1--87:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358219",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358219",
  abstract =     "This paper presents a novel framework for
                 decentralized monitoring of Linear Temporal Logic (LTL)
                 formulas, under the situation where processes are
                 synchronous and the formula is represented as a
                 tableau. The tableau technique allows one to construct
                 a semantic tree for the input LTL formula, which can be
                 used to optimize the decentralized monitoring of LTL in
                 various ways. Given a system P and an LTL formula $
                 \varphi $, we construct a tableau $ T_\varphi $. The
                 tableau $ T_\varphi $ is used for two purposes: (a) to
                 synthesize an efficient round-robin communication
                 policy for processes, and (b) to find the minimal ways
                 to decompose the formula and communicate observations
                 of processes in an efficient way. In our framework,
                 processes can propagate truth values of both atomic and
                 compound formulas (non-atomic formulas) depending on
                 the syntactic structure of the input LTL formula and
                 the observation power of processes. We demonstrate that
                 this approach of decentralized monitoring based on
                 tableau construction is more straightforward, more
                 flexible, and more likely to yield efficient solutions
                 than alternative approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baumeister:2019:FSM,
  author =       "Jan Baumeister and Bernd Finkbeiner and Maximilian
                 Schwenger and Hazem Torfah",
  title =        "{FPGA} Stream-Monitoring of Real-time Properties",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "88:1--88:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358220",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358220",
  abstract =     "An essential part of cyber-physical systems is the
                 online evaluation of real-time data streams. Especially
                 in systems that are intrinsically safety-critical, a
                 dedicated monitoring component inspecting data streams
                 to detect problems at runtime greatly increases the
                 confidence in a safe execution. Such a monitor needs to
                 be based on a specification language capable of
                 expressing complex, high-level properties using only
                 the accessible low-level signals. Moreover, tight
                 constraints on computational resources exacerbate the
                 requirements on the monitor. Thus, several existing
                 approaches to monitoring are not applicable due to
                 their dependence on an operating system. We present an
                 FPGA-based monitoring approach by compiling an RTL ola
                 specification into synthesizable VHDL code. RTLola is a
                 stream-based specification language capable of
                 expressing complex real-time properties while providing
                 an upper bound on the execution time and memory
                 requirements. The statically determined memory bound
                 allows for a compilation to an FPGA with a fixed size.
                 An advantage of FPGAs is a simple integration process
                 in existing systems and superb executing time. The
                 compilation results in a highly parallel implementation
                 thanks to the modular nature of RTLola specifications.
                 This further increases the maximal event rate the
                 monitor can handle.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bajczi:2019:WMP,
  author =       "Levente Bajczi and Andr{\'a}s V{\"o}r{\"o}s and Vince
                 Moln{\'a}r",
  title =        "Will My Program Break on This Faulty Processor?:
                 {Formal} Analysis of Hardware Fault Activations in
                 Concurrent Embedded Software",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "89:1--89:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358238",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358238",
  abstract =     "Formal verification is approaching a point where it
                 will be reliably applicable to embedded software. Even
                 though formal verification can efficiently analyze
                 multi-threaded applications, multi-core processors are
                 often considered too dangerous to use in critical
                 systems, despite the many benefits they can offer. One
                 reason is the advanced memory consistency model of such
                 CPUs. Nowadays, most software verifiers assume strict
                 sequential consistency, which is also the na{\"\i}ve
                 view of programmers. Modern multi-core processors,
                 however, rarely guarantee this assumption by default.
                 In addition, complex processor architectures may easily
                 contain design faults. Thanks to the recent advances in
                 hardware verification, these faults are increasingly
                 visible and can be detected even in existing
                 processors, giving an opportunity to compensate for the
                 problem in software. In this paper, we propose a
                 generic approach to consider inconsistent behavior of
                 the hardware in the analysis of software. Our approach
                 is based on formal methods and can be used to detect
                 the activation of existing hardware faults on the
                 application level and facilitate their mitigation in
                 software. The approach relies heavily on recent results
                 of model checking and hardware verification and offers
                 new, integrative research directions. We propose a
                 partial solution based on existing model checking tools
                 to demonstrate feasibility and evaluate their
                 performance in this context.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2019:TAS,
  author =       "Youngmoon Lee and Kang G. Shin and Hoon Sung Chwa",
  title =        "Thermal-Aware Scheduling for Integrated {CPUs--GPU}
                 Platforms",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "90:1--90:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358235",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358235",
  abstract =     "As modern embedded systems like cars need high-power
                 integrated CPUs--GPU SoCs for various real-time
                 applications such as lane or pedestrian detection, they
                 face greater thermal problems than before, which may,
                 in turn, incur higher failure rate and cooling cost. We
                 demonstrate, via experimentation on a representative
                 CPUs--GPU platform, the importance of accounting for
                 two distinct thermal characteristics-the platform's
                 temperature imbalance and different power dissipations
                 of different tasks -in real-time scheduling to avoid
                 any burst of power dissipations while guaranteeing all
                 timing constraints. To achieve this goal, we propose a
                 new Real-Time Thermal-Aware Scheduling (RT-TAS)
                 framework. We first capture different CPU cores'
                 temperatures caused by different GPU power dissipations
                 (i.e., CPUs--GPU thermal coupling) with core-specific
                 thermal coupling coefficients. We then develop
                 thermally-balanced task-to-core assignment and
                 CPUs--GPU co-scheduling. The former addresses the
                 platform's temperature imbalance by efficiently
                 distributing the thermal load across cores while
                 preserving scheduling feasibility. Building on the
                 thermally-balanced task assignment, the latter
                 cooperatively schedules CPU and GPU computations to
                 avoid simultaneous peak power dissipations on both CPUs
                 and GPU, thus mitigating excessive temperature rises
                 while meeting task deadlines. We have implemented and
                 evaluated RT-TAS on an automotive embedded platform to
                 demonstrate its effectiveness in reducing the maximum
                 temperature by 6-12.2${}^\circ $ C over existing
                 approaches without violating any task deadline.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2019:TAF,
  author =       "Peng Chen and Weichen Liu and Xu Jiang and Qingqiang
                 He and Nan Guan",
  title =        "Timing-Anomaly Free Dynamic Scheduling of Conditional
                 {DAG} Tasks on Multi-Core Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "91:1--91:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358236",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358236",
  abstract =     "In this paper, we propose a novel approach to schedule
                 conditional DAG parallel tasks, with which we can
                 derive safe response time upper bounds significantly
                 better than the state-of-the-art counterparts. The main
                 idea is to eliminate the notorious timing anomaly in
                 scheduling parallel tasks by enforcing certain order
                 constraints among the vertices, and thus the response
                 time bound can be accurately predicted off-line by
                 somehow ``simulating'' the runtime scheduling. A key
                 challenge to apply the timing-anomaly free scheduling
                 approach to conditional DAG parallel tasks is that at
                 runtime it may generate exponentially many instances
                 from a conditional DAG structure. To deal with this
                 problem, we develop effective abstractions, based on
                 which a safe response time upper bound is computed in
                 polynomial time. We also develop algorithms to explore
                 the vertex orders to shorten the response time bound.
                 The effectiveness of the proposed approach is evaluated
                 by experiments with randomly generated DAG tasks with
                 different parameter configurations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2019:SVH,
  author =       "Yu Wang and Mojtaba Zarei and Borzoo Bonakdarpour and
                 Miroslav Pajic",
  title =        "Statistical Verification of Hyperproperties for
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "92:1--92:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358232",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358232",
  abstract =     "Many important properties of cyber-physical systems
                 (CPS) are defined upon the relationship between
                 multiple executions simultaneously in continuous time.
                 Examples include probabilistic fairness and sensitivity
                 to modeling errors (i.e., parameters changes) for
                 real-valued signals. These requirements can only be
                 specified by hyperproperties. In this article, we focus
                 on verifying probabilistic hyperproperties for CPS. To
                 cover a wide range of modeling formalisms, we first
                 propose a general model of probabilistic uncertain
                 systems (PUSs) that unify commonly studied CPS models
                 such as continuous-time Markov chains (CTMCs) and
                 probabilistically parametrized Hybrid I/O Automata
                 (P$^2$ HIOA). To formally specify hyperproperties, we
                 propose a new temporal logic, hyper probabilistic
                 signal temporal logic (HyperPSTL) that serves as a
                 hyper and probabilistic version of the conventional
                 signal temporal logic (STL). Considering the complexity
                 of real-world systems that can be captured as PUSs, we
                 adopt a statistical model checking (SMC) approach for
                 their verification. We develop a new SMC technique
                 based on the direct computation of significance levels
                 of statistical assertions for HyperPSTL specifications,
                 which requires no a priori knowledge on the
                 indifference margin. Then, we introduce SMC algorithms
                 for HyperPSTL specifications on the joint probabilistic
                 distribution of multiple paths, as well as
                 specifications with nested probabilistic operators
                 quantifying different paths, which cannot be handled by
                 existing SMC algorithms. Finally, we show the
                 effectiveness of our SMC algorithms on CPS benchmarks
                 with varying levels of complexity, including the Toyota
                 Powertrain Control System.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Luo:2019:PFC,
  author =       "Zhengxiong Luo and Feilong Zuo and Yu Jiang and Jian
                 Gao and Xun Jiao and Jiaguang Sun",
  title =        "{Polar}: Function Code Aware Fuzz Testing of {ICS}
                 Protocol",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "93:1--93:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358227",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358227",
  abstract =     "Industrial Control System (ICS) protocols are widely
                 used to build communications among system components.
                 Compared with common internet protocols, ICS protocols
                 have more control over remote devices by carrying a
                 specific field called ``function code'', which assigns
                 what the receive end should do. Therefore, it is of
                 vital importance to ensure their correctness. However,
                 traditional vulnerability detection techniques such as
                 fuzz testing are challenged by the increasing
                 complexity of these diverse ICS protocols. In this
                 paper, we present a function code aware fuzzing
                 framework --- Polar, which automatically extracts
                 semantic information from the ICS protocol and utilizes
                 this information to accelerate security vulnerability
                 detection. Based on static analysis and dynamic taint
                 analysis, Polar initiates the values of the function
                 code field and identifies some vulnerable operations.
                 Then, novel semantic aware mutation and selection
                 strategies are designed to optimize the fuzzing
                 procedure. For evaluation, we implement Polar on top of
                 two popular fuzzers --- AFL and AFLFast, and conduct
                 experiments on several widely used ICS protocols such
                 as Modbus, IEC104, and IEC 61850. Results show that,
                 compared with AFL and AFLFast, Polar achieves the same
                 code coverage and bug detection numbers at the speed of
                 1.5X-12X. It also gains increase with 0\%--91\% more
                 paths within 24 hours. Furthermore, Polar has exposed
                 10 previously unknown vulnerabilities in those
                 protocols, 6 of which have been assigned unique CVE
                 identifiers in the US National Vulnerability
                 Database.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sun:2019:STC,
  author =       "Youcheng Sun and Xiaowei Huang and Daniel Kroening and
                 James Sharp and Matthew Hill and Rob Ashmore",
  title =        "Structural Test Coverage Criteria for Deep Neural
                 Networks",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "94:1--94:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358233",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358233",
  abstract =     "Deep neural networks (DNNs) have a wide range of
                 applications, and software employing them must be
                 thoroughly tested, especially in safety-critical
                 domains. However, traditional software test coverage
                 metrics cannot be applied directly to DNNs. In this
                 paper, inspired by the MC/DC coverage criterion, we
                 propose a family of four novel test coverage criteria
                 that are tailored to structural features of DNNs and
                 their semantics. We validate the criteria by
                 demonstrating that test inputs that are generated with
                 guidance by our proposed coverage criteria are able to
                 capture undesired behaviours in a DNN. Test cases are
                 generated using a symbolic approach and a
                 gradient-based heuristic search. By comparing them with
                 existing methods, we show that our criteria achieve a
                 balance between their ability to find bugs (proxied
                 using adversarial examples and correlation with
                 functional coverage) and the computational cost of test
                 input generation. Our experiments are conducted on
                 state-of-the-art DNNs obtained using popular open
                 source datasets, including MNIST, CIFAR-10 and
                 ImageNet.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2019:GBM,
  author =       "Yi-Ting Lin and Hsiang Hsu and Shang-Chien Lin and
                 Chung-Wei Lin and Iris Hui-Ru Jiang and Changliu Liu",
  title =        "Graph-Based Modeling, Scheduling, and Verification for
                 Intersection Management of Intelligent Vehicles",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "95:1--95:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358221",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358221",
  abstract =     "Intersection management is one of the most
                 representative applications of intelligent vehicles
                 with connected and autonomous functions. The
                 connectivity provides environmental information that a
                 single vehicle cannot sense, and the autonomy supports
                 precise vehicular control that a human driver cannot
                 achieve. Intersection management solves the fundamental
                 conflict resolution problem for vehicles-two vehicles
                 should not appear at the same location at the same
                 time, and, if they intend to do that, an order should
                 be decided to optimize certain objectives such as the
                 traffic throughput or smoothness. In this paper, we
                 first propose a graph-based model for intersection
                 management. The model is general and applicable to
                 different granularities of intersections and other
                 conflicting scenarios. We then derive formal
                 verification approaches which can guarantee
                 deadlock-freeness. Based on the graph-based model and
                 the verification approaches, we develop a centralized
                 cycle removal algorithm for the graph-based model to
                 schedule vehicles to go through the intersection safely
                 (without collisions) and efficiently without deadlocks.
                 Experimental results demonstrate the expressiveness of
                 the proposed model and the effectiveness and efficiency
                 of the proposed algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kyriakis:2019:SMR,
  author =       "Panagiotis Kyriakis and Jyotirmoy V. Deshmukh and Paul
                 Bogdan",
  title =        "Specification Mining and Robust Design under
                 Uncertainty: a Stochastic Temporal Logic Approach",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "96:1--96:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358231",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358231",
  abstract =     "In this paper, we propose Stochastic Temporal Logic
                 (StTL) as a formalism for expressing probabilistic
                 specifications on time-varying behaviors of controlled
                 stochastic dynamical systems. To make StTL a more
                 effective specification formalism, we introduce the
                 quantitative semantics for StTL to reason about the
                 robust satisfaction of an StTL specification by a given
                 system. Additionally, we propose using the robustness
                 value as the objective function to be maximized by a
                 stochastic optimization algorithm for the purpose of
                 controller design. Finally, we formulate an algorithm
                 for parameter inference for Parameteric-StTL
                 specifications, which allows specifications to be mined
                 from output traces of the underlying system. We
                 demonstrate and validate our framework on two case
                 studies inspired by the automotive domain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2019:RRS,
  author =       "Bineet Ghosh and Parasara Sridhar Duggirala",
  title =        "Robust Reachable Set: Accounting for Uncertainties in
                 Linear Dynamical Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "97:1--97:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358229",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358229",
  abstract =     "Reachable set computation is one of the primary
                 techniques for safety verification of linear dynamical
                 systems. In reality the underlying dynamics have
                 uncertainties like parameter variations or modeling
                 uncertainties. Therefore, the reachable set computation
                 must consider the uncertainties in the dynamics to be
                 useful i.e. the computed reachable set should be over
                 or under approximation if not exact. This paper
                 presents a technique to compute reachable set of linear
                 dynamical systems with uncertainties. First, we
                 introduce a construct called support of a matrix. Using
                 this construct, we present a set of sufficient
                 conditions for which reachable set for uncertain linear
                 system can be computed efficiently; and safety
                 verification can be performed using bi-linear
                 programming. Finally, given a linear dynamical system,
                 we compute robust reachable set, which accounts for all
                 possible uncertainties that can be handled by the
                 sufficient conditions presented. Experimental
                 evaluation on benchmarks reveal that our algorithm is
                 computationally very efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lal:2019:CGA,
  author =       "Ratan Lal and Pavithra Prabhakar",
  title =        "Counterexample Guided Abstraction Refinement for
                 Polyhedral Probabilistic Hybrid Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "98:1--98:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358217",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358217",
  abstract =     "We consider the problem of safety analysis of
                 probabilistic hybrid systems, which capture discrete,
                 continuous and probabilistic behaviors. We present a
                 novel counterexample guided abstraction refinement
                 (CEGAR) algorithm for a subclass of probabilistic
                 hybrid systems, called polyhedral probabilistic hybrid
                 systems (PHS), where the continuous dynamics is
                 specified using a polyhedral set within which the
                 derivatives of the continuous executions lie.
                 Developing a CEGAR algorithm for PHS is complex owing
                 to the branching behavior due to the probabilistic
                 transitions, and the infinite state space due to the
                 real-valued variables. We present a practical algorithm
                 by choosing a succinct representation for
                 counterexamples, an efficient validation algorithm and
                 a constructive method for refinement that ensures
                 progress towards the elimination of a spurious abstract
                 counterexample. The technical details for refinement
                 are non-trivial since there are no clear disjoint sets
                 for separation. We have implemented our algorithm in a
                 Python toolbox called Procegar; our experimental
                 analysis demonstrates the benefits of our method in
                 terms of successful verification results, as well as
                 bug finding.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Duggirala:2019:ASR,
  author =       "Parasara Sridhar Duggirala and Stanley Bak",
  title =        "Aggregation Strategies in Reachable Set Computation of
                 Hybrid Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "99:1--99:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358214",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358214",
  abstract =     "Computing the set of reachable states is a widely used
                 technique for proving that a hybrid system satisfies
                 its safety specification. Flow-pipe construction
                 methods interleave phases of computing continuous
                 successors and phases of computing discrete successors.
                 Directly doing this leads to a combinatorial explosion
                 problem, though, as with each discrete successor there
                 may be an interval of time where the transition can
                 occur, so that the number of paths becomes exponential
                 in the number of discrete transitions. For this reason,
                 most reachable set computation tools implement some
                 form of set aggregation for discrete transitions, such
                 as, performing a template-based overapproximation or
                 convex hull aggregation. These aggregation methods,
                 however, in theory can lead to unbounded error, and in
                 practice are often the root cause of why a safety
                 specification cannot be proven. This paper proposes
                 techniques for improving the accuracy of the
                 aggregation operations performed for reachable set
                 computation. First, we present two aggregation
                 strategies over generalized stars, namely convex hull
                 aggregation and template based aggregation. Second, we
                 perform adaptive deaggregation using a data structure
                 called Aggregated Directed Acyclic Graph (AGGDAG). Our
                 deaggregation strategy is driven by counterexamples and
                 hence has soundness and relative completeness
                 guarantees. We demonstrate the computational benefits
                 of our approach through two case studies involving
                 satellite rendezvous and gearbox meshing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Salamati:2019:MEM,
  author =       "Mahmoud Salamati and Rocco Salvia and Eva Darulova and
                 Sadegh Soudjani and Rupak Majumdar",
  title =        "Memory-Efficient Mixed-Precision Implementations for
                 Robust Explicit Model Predictive Control",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "100:1--100:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358223",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358223",
  abstract =     "We propose an optimization for space-efficient
                 implementations of explicit model-predictive
                 controllers (MPC) for robust control of linear
                 time-invariant (LTI) systems on embedded platforms. We
                 obtain an explicit-form robust model-predictive
                 controller as a solution to a multi-parametric linear
                 programming problem. The structure of the controller is
                 a polyhedral decomposition of the control domain, with
                 an affine map for each domain. While explicit MPC is
                 suited for embedded devices with low computational
                 power, the memory requirements for such controllers can
                 be high. We provide an optimization algorithm for a
                 mixed-precision implementation of the controller, where
                 the deviation of the implemented controller from the
                 original one is within the robustness margin of the
                 robust control problem. The core of the mixed-precision
                 optimization is an iterative static analysis that
                 co-designs a robust controller and a low-bitwidth
                 approximation that is statically guaranteed to always
                 be within the robustness margin of the original
                 controller. We have implemented our algorithm and show
                 on a set of benchmarks that our optimization can reduce
                 space requirements by up to 20.9\% and on average by
                 12.6\% compared to a minimal uniform precision
                 implementation of the original controller.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Arrestier:2019:NRD,
  author =       "Florian Arrestier and Karol Desnos and Eduardo Juarez
                 and Daniel Menard",
  title =        "Numerical Representation of Directed Acyclic Graphs
                 for Efficient Dataflow Embedded Resource Allocation",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "101:1--101:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358225",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358225",
  abstract =     "Stream processing applications running on
                 Heterogeneous Multi-Processor Systems on Chips
                 (HMPSoCs) require efficient resource allocation and
                 management, both at compile-time and at runtime. To
                 cope with modern adaptive applications whose behavior
                 can not be exhaustively predicted at compile-time,
                 runtime managers must be able to take resource
                 allocation decisions on-the-fly, with a minimum
                 overhead on application performance. Resource
                 allocation algorithms often rely on an internal
                 modeling of an application. Directed Acyclic Graph
                 (DAGs) are the most commonly used models for capturing
                 control and data dependencies between tasks. DAGs are
                 notably often used as an intermediate representation
                 for deploying applications modeled with a dataflow
                 Model of Computation (MoC) on HMPSoCs. Building such
                 intermediate representation at runtime for massively
                 parallel applications is costly both in terms of
                 computation and memory overhead. In this paper, an
                 intermediate representation of DAGs for resource
                 allocation is presented. This new representation shows
                 improved performance for run-time analysis of dataflow
                 graphs with less overhead in both computation time and
                 memory footprint. The performances of the proposed
                 representation are evaluated on a set of computer
                 vision and machine learning applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ziegler:2019:HSE,
  author =       "Andreas Ziegler and Julian Geus and Bernhard Heinloth
                 and Timo H{\"o}nig and Daniel Lohmann",
  title =        "{Honey}, {I} Shrunk the {ELFs}: Lightweight Binary
                 Tailoring of Shared Libraries",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "102:1--102:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358222",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358222",
  abstract =     "In the embedded domain, industrial sectors (i.e.,
                 automotive industry, avionics) are undergoing radical
                 changes. They broadly adopt commodity hardware and move
                 away from special-purpose control units. During this
                 transition, heterogeneous software components are
                 consolidated to run on commodity operating systems. To
                 efficiently consolidate such components, a modular
                 encapsulation of common functionality into reusable
                 binary files (i.e., shared libraries) is essential.
                 However, shared libraries are often unnecessarily large
                 as they entail a lot of generic functionality that is
                 not required in a narrowly defined scenario. As the
                 source code of proprietary components is often
                 unavailable and the industry is heading towards
                 binary-only distribution, we propose an approach
                 towards lightweight binary tailoring. As demonstrated
                 in the evaluation, lightweight binary tailoring
                 effectively reduces the amount of code in all shared
                 libraries on a Linux-based system by 63 percent and
                 shrinks their files by 17 percent. The reduction in
                 size is beneficial to cut down costs (e.g., lower
                 storage and memory footprint) and eases code analyses
                 that are necessary for code audits.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2019:MTP,
  author =       "Runyu Pan and Gabriel Parmer",
  title =        "{MxU}: Towards Predictable, Flexible, and Efficient
                 Memory Access Control for the Secure {IoT}",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "103:1--103:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358224",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358224",
  abstract =     "The advanced functionality requirements of modern
                 embedded and Internet of Things (IoT) devices --- from
                 autonomous vehicles, to city and power-grid management
                 --- are driving an ever-increasing software complexity.
                 At the same time, the pervasive internet connections of
                 these systems necessitate the fundamental design of
                 security into these devices. The isolation of complex
                 features from those that are critical through
                 protection domains is an effective means to constrain
                 the scope of faults and security breaches. Common
                 hardware-provided memory facilities to enforce
                 protection domains through memory access control ---
                 including Memory Management Units (MMUs) usually found
                 in microprocessors, and Memory Protection Units (MPUs)
                 usually found in microcontrollers --- must meet the
                 goals of enabling flexible, efficient and dynamic
                 management of memory, and must enable tight bounds on
                 the worst-case execution of critical code.
                 Unfortunately, current system memory management
                 facilities are ill-prepared to handle this challenge:
                 MMUs that use extensive caches to achieve strong
                 average-case performance suffer from debilitating
                 worst-case and even average-case behavior under hefty
                 interference, while MPUs struggle to provide flexible
                 memory management. This paper details MxU, a memory
                 protection and allocation abstraction that integrates
                 temporal specifications into the memory management
                 subsystem, to enable portable code to achieve both
                 predictable, tightly-bounded execution and dynamic
                 management across both MMU- and MPU-based systems. We
                 implement MxU in the Composite microkernel, and
                 evaluate its flexibility and predictability over two
                 different architectures: a MPU-based Cortex-M7
                 microcontroller and a MMU-based Cortex-A9
                 microprocessor using a suite of modern applications
                 including neural network-based inference, SQLite, and a
                 javascript runtime. For MMU-based systems, MxU reduces
                 application TLB stall by up to 68.0\%. For MPU-based
                 systems, MxU enables flexible dynamic memory management
                 often with application overheads of 1\%, increasing to
                 6.1\% under significant interference.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yim:2019:TFS,
  author =       "Keun Soo Yim and Iliyan Malchev and Andrew Hsieh and
                 Dave Burke",
  title =        "{Treble}: Fast Software Updates by Creating an
                 Equilibrium in an Active Software Ecosystem of Globally
                 Distributed Stakeholders",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "104:1--104:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358237",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358237",
  abstract =     "This paper presents our experience with Treble, a
                 two-year initiative to build the modular base in
                 Android, a Java-based mobile platform running on the
                 Linux kernel. Our Treble architecture splits the
                 hardware independent core framework written in Java
                 from the hardware dependent vendor implementations
                 (e.g., user space device drivers, vendor native
                 libraries, and kernel written in C/C++). Cross-layer
                 communications between them are done via versioned,
                 stable inter-process communication interfaces whose
                 backward compatibility is tested by using two API
                 compliance suites. Based on this architecture, we
                 repackage the key Android software components that
                 suffered from crucial post-launch security bugs as
                 separate images. That not only enables separate
                 ownerships but also independent updates of each image
                 by interested ecosystem entities. We discuss our
                 experience of delivering Treble architectural changes
                 to silicon vendors and device makers using a yearly
                 release model. Our experiments and industry rollouts
                 support our hypothesis that giving more freedom to all
                 ecosystem entities and creating an equilibrium are a
                 transformation necessary to further scale the world
                 largest open source ecosystem with over two billion
                 active devices.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tran:2019:SVC,
  author =       "Hoang-Dung Tran and Feiyang Cai and Manzanas Lopez
                 Diego and Patrick Musau and Taylor T. Johnson and
                 Xenofon Koutsoukos",
  title =        "Safety Verification of Cyber-Physical Systems with
                 Reinforcement Learning Control",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "105:1--105:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358230",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358230",
  abstract =     "This paper proposes a new forward reachability
                 analysis approach to verify safety of cyber-physical
                 systems (CPS) with reinforcement learning controllers.
                 The foundation of our approach lies on two efficient,
                 exact and over-approximate reachability algorithms for
                 neural network control systems using star sets, which
                 is an efficient representation of polyhedra. Using
                 these algorithms, we determine the initial conditions
                 for which a safety-critical system with a neural
                 network controller is safe by incrementally searching a
                 critical initial condition where the safety of the
                 system cannot be established. Our approach produces
                 tight over-approximation error and it is
                 computationally efficient, which allows the application
                 to practical CPS with learning enable components
                 (LECs). We implement our approach in NNV, a recent
                 verification tool for neural networks and neural
                 network control systems, and evaluate its advantages
                 and applicability by verifying safety of a practical
                 Advanced Emergency Braking System (AEBS) with a
                 reinforcement learning (RL) controller trained using
                 the deep deterministic policy gradient (DDPG) method.
                 The experimental results show that our new reachability
                 algorithms are much less conservative than existing
                 polyhedra-based approaches. We successfully determine
                 the entire region of the initial conditions of the AEBS
                 with the RL controller such that the safety of the
                 system is guaranteed, while a polyhedra-based approach
                 cannot prove the safety properties of the system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2019:RRA,
  author =       "Chao Huang and Jiameng Fan and Wenchao Li and Xin Chen
                 and Qi Zhu",
  title =        "{ReachNN}: Reachability Analysis of Neural-Network
                 Controlled Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "106:1--106:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358228",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358228",
  abstract =     "Applying neural networks as controllers in dynamical
                 systems has shown great promises. However, it is
                 critical yet challenging to verify the safety of such
                 control systems with neural-network controllers in the
                 loop. Previous methods for verifying neural network
                 controlled systems are limited to a few specific
                 activation functions. In this work, we propose a new
                 reachability analysis approach based on Bernstein
                 polynomials that can verify neural-network controlled
                 systems with a more general form of activation
                 functions, i.e., as long as they ensure that the neural
                 networks are Lipschitz continuous. Specifically, we
                 consider abstracting feedforward neural networks with
                 Bernstein polynomials for a small subset of inputs. To
                 quantify the error introduced by abstraction, we
                 provide both theoretical error bound estimation based
                 on the theory of Bernstein polynomials and more
                 practical sampling based error bound estimation,
                 following a tight Lipschitz constant estimation
                 approach based on forward reachability analysis.
                 Compared with previous methods, our approach addresses
                 a much broader set of neural networks, including
                 heterogeneous neural networks that contain multiple
                 types of activation functions. Experiment results on a
                 variety of benchmarks show the effectiveness of our
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yaghoubi:2019:WCS,
  author =       "Shakiba Yaghoubi and Georgios Fainekos",
  title =        "Worst-case Satisfaction of {STL} Specifications Using
                 Feedforward Neural Network Controllers: a {Lagrange}
                 Multipliers Approach",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "107:1--107:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358239",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358239",
  abstract =     "In this paper, a reinforcement learning approach for
                 designing feedback neural network controllers for
                 nonlinear systems is proposed. Given a Signal Temporal
                 Logic (STL) specification which needs to be satisfied
                 by the system over a set of initial conditions, the
                 neural network parameters are tuned in order to
                 maximize the satisfaction of the STL formula. The
                 framework is based on a max-min formulation of the
                 robustness of the STL formula. The maximization is
                 solved through a Lagrange multipliers method, while the
                 minimization corresponds to a falsification problem. We
                 present our results on a vehicle and a quadrotor model
                 and demonstrate that our approach reduces the training
                 time more than 50 percent compared to the baseline
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Oehlert:2019:CIT,
  author =       "Dominic Oehlert and Selma Saidi and Heiko Falk",
  title =        "Code-Inherent Traffic Shaping for Hard Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "108:1--108:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358215",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358215",
  abstract =     "Modern hard real-time systems evolved from isolated
                 single-core architectures to complex multi-core
                 architectures which are often connected in a
                 distributed manner. With the increasing influence of
                 interconnections in hard real-time systems, the access
                 behavior to shared resources of single tasks or cores
                 becomes a crucial factor for the system's overall
                 worst-case timing properties. Traffic shaping is a
                 powerful technique to decrease contention in a network
                 and deliver guarantees on network streams. In this
                 paper we present a novel approach to automatically
                 integrate a traffic shaping behavior into the code of a
                 program for different traffic shaping profiles while
                 being as least invasive as possible. As this approach
                 is solely depending on modifying programs on a
                 code-level, it does not rely on any additional hardware
                 or operating system-based functions. We show how
                 different traffic shaping profiles can be implemented
                 into programs using a greedy heuristic and an
                 evolutionary algorithm, as well as their influences on
                 the modified programs. It is demonstrated that the
                 presented approaches can be used to decrease worst-case
                 execution times in multi-core systems and lower buffer
                 requirements in distributed systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Awan:2019:TAM,
  author =       "Muhammad Ali Awan and Konstantinos Bletsas and Pedro
                 F. Souto and Benny Akesson and Eduardo Tovar",
  title =        "Techniques and Analysis for Mixed-criticality
                 Scheduling with Mode-dependent Server Execution
                 Budgets",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "109:1--109:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358234",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358234",
  abstract =     "In mixed-criticality systems, tasks of different
                 criticality share system resources, mainly to reduce
                 cost. Cost is further reduced by using adaptive
                 mode-based scheduling arrangements, such as Vestal's
                 model, to improve resource efficiency, while
                 guaranteeing schedulability of critical functionality.
                 To simplify safety certification, servers are often
                 used to provide temporal isolation between tasks. In
                 its simplest form, a server is a periodically recurring
                 time window, in which some tasks are scheduled. A
                 server's computational requirements may greatly vary in
                 different modes, although state-of-the-art techniques
                 and schedulability tests do not allow different budgets
                 to be used by a server in different modes. This results
                 in a single conservative execution budget for all
                 modes, increasing system cost. The goal of this paper
                 is to reduce the cost of mixed-criticality systems
                 through three main contributions: (i) a scheduling
                 arrangement for uniprocessor systems employing
                 fixed-priority scheduling within periodic servers,
                 whose budgets are dynamically adjusted at run-time in
                 the event of a mode change, (ii) a new schedulability
                 analysis for such systems, and (iii) heuristic
                 algorithms for assigning budgets to servers in
                 different modes and ordering the execution of the
                 servers. Experiments with synthetic task sets
                 demonstrate considerable improvements (up to 52.8\%) in
                 scheduling success ratio when using dynamic server
                 budgets vs. static ``one-size-fits-all-modes''
                 budgets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{VanPinxten:2019:PSC,
  author =       "Joost {Van Pinxten} and Marc Geilen and Twan Basten",
  title =        "Parametric Scheduler Characterization",
  journal =      j-TECS,
  volume =       "18",
  number =       "5s",
  pages =        "110:1--110:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3358226",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Oct 17 18:16:44 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3358226",
  abstract =     "Schedulers assign starting times to events in a system
                 such that a set of constraints is met and system
                 productivity is maximized. We characterize the
                 scheduler behaviour for the case where decisions are
                 made by comparing affine expressions of design
                 parameters such as task workload, processing speed,
                 robot travelling speed, or a controller's rise and
                 settling time. Deterministic schedulers can be extended
                 with symbolic execution, to keep track of the affine
                 conditions on the parameters for which the scheduling
                 decisions are made. We introduce a divide-and-conquer
                 algorithm that uses this information to determine
                 parameter regions for which the same sequence of
                 decisions is taken given a particular scenario. The
                 results provide designers insight in the impact of
                 parameter changes on the performance of their system.
                 The exploration can also be executed with the KLEE
                 symbolic execution engine of the LLVM tool chain to
                 extract the same results. We show that the
                 divide-and-conquer approach provides the results much
                 faster than the generic symbolic execution engine of
                 KLEE. The results allow visualization of the
                 sensitivity to all parameter combinations. The results
                 of our approach therefore provide more insight in the
                 sensitivity to parameters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2020:EEC,
  author =       "Sandeep K. Shukla",
  title =        "Editorial: Embedded Computing and Society",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--3",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3368250",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3368250",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jain:2020:CHS,
  author =       "Shubham Jain and Anand Raghunathan",
  title =        "{CxDNN}: Hardware-software Compensation Methods for
                 Deep Neural Networks on Resistive Crossbar Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--23",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362035",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362035",
  abstract =     "Resistive crossbars have shown strong potential as the
                 building blocks of future neural fabrics, due to their
                 ability to natively execute vector-matrix
                 multiplication (the dominant computational kernel in
                 DNNs). However, a key challenge that arises in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "113",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tiku:2020:OSV,
  author =       "Saideep Tiku and Sudeep Pasricha",
  title =        "Overcoming Security Vulnerabilities in Deep
                 Learning-based Indoor Localization Frameworks on Mobile
                 Devices",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--24",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362036",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362036",
  abstract =     "Indoor localization is an emerging application domain
                 for the navigation and tracking of people and assets.
                 Ubiquitously available Wi-Fi signals have enabled
                 low-cost fingerprinting-based localization solutions.
                 Further, the rapid growth in mobile \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "114",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tiwari:2020:RRA,
  author =       "Sakshi Tiwari and Shreshth Tuli and Isaar Ahmad and
                 Ayushi Agarwal and Preeti Ranjan Panda and Sreenivas
                 Subramoney",
  title =        "{REAL}: {REquest} Arbitration in Last Level Caches",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--24",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362100",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362100",
  abstract =     "Shared last level caches (LLC) of multicore
                 systems-on-chip are subject to a significant amount of
                 contention over a limited bandwidth, resulting in major
                 performance bottlenecks that make the issue a
                 first-order concern in modern multiprocessor
                 systems-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "115",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sood:2020:RDV,
  author =       "Surinder Sood and Avinash Malik and Partha Roop",
  title =        "Robust Design and Validation of Cyber-physical
                 Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--21",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362098",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362098",
  abstract =     "Co-simulation--based validation of hardware
                 controllers adjoined with plant models, with continuous
                 dynamics, is an important step in model-based design of
                 controllers for Cyber-physical Systems (CPS).
                 Co-simulation suffers from many problems, such as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "116",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2020:BBT,
  author =       "Jia Zhou and Prachi Joshi and Haibo Zeng and Renfa
                 Li",
  title =        "{BTMonitor}: Bit-time-based Intrusion Detection and
                 Attacker Identification in Controller Area Network",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--23",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362034",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362034",
  abstract =     "With the rapid growth of connectivity and autonomy for
                 today's automobiles, their security vulnerabilities are
                 becoming one of the most urgent concerns in the
                 automotive industry. The lack of message authentication
                 in Controller Area Network (CAN), \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "117",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2020:HSC,
  author =       "Mengquan Li and Weichen Liu and Nan Guan and Yiyuan
                 Xie and Yaoyao Ye",
  title =        "Hardware-Software Collaborative Thermal Sensing in
                 Optical Network-on-Chip--based Manycore Systems",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--24",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362099",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362099",
  abstract =     "Continuous technology scaling in manycore systems
                 leads to severe overheating issues. To guarantee system
                 reliability, it is critical to accurately yet
                 efficiently monitor runtime temperature distribution
                 for effective chip thermal management. As an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "118",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Neshatpour:2020:IIC,
  author =       "Katayoun Neshatpour and Houman Homayoun and Avesta
                 Sasan",
  title =        "{ICNN}: The Iterative Convolutional Neural Network",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--27",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3355553",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3355553",
  abstract =     "Modern and recent architectures of vision-based
                 Convolutional Neural Networks (CNN) have improved
                 detection and prediction accuracy significantly.
                 However, these algorithms are extremely computationally
                 intensive. To break the power and performance wall
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "119",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cedersjo:2020:TFC,
  author =       "Gustav Cedersj{\"o} and J{\"o}rn W. Janneck",
  title =        "{T{\"y}cho}: a Framework for Compiling Stream
                 Programs",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--25",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362692",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362692",
  abstract =     "Many application areas for embedded systems, such as
                 DSP, media coding, and image processing, are based on
                 stream processing. Stream programs in these areas are
                 often naturally described as graphs, where nodes are
                 computational kernels that send data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "120",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hammadeh:2020:WHR,
  author =       "Zain A. H. Hammadeh and Sophie Quinton and Rolf
                 Ernst",
  title =        "Weakly-hard Real-time Guarantees for Earliest Deadline
                 First Scheduling of Independent Tasks",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--25",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3356865",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3356865",
  abstract =     "The current trend in modeling and analyzing real-time
                 systems is toward tighter yet safe timing constraints.
                 Many practical real-time systems can de facto sustain a
                 bounded number of deadline-misses, i.e., they have
                 Weakly-Hard Real-Time (WHRT) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "121",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Krishnakumar:2020:APL,
  author =       "Gnanambikai Krishnakumar and Kommuru Alekhya Reddy and
                 Chester Rebeiro",
  title =        "{ALEXIA}: a Processor with Lightweight Extensions for
                 Memory Safety",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--27",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362064",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362064",
  abstract =     "Illegal use of memory pointers is a serious security
                 vulnerability. A large number of malwares exploit the
                 spatial and temporal nature of these vulnerabilities to
                 subvert execution or glean sensitive data from an
                 application. Recent countermeasures \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "122",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yan:2020:TCH,
  author =       "Kaige Yan and Jingweijia Tan and Longjun Liu and
                 Xingyao Zhang and Stanko R. Brankovic and Jinghong Chen
                 and Xin Fu",
  title =        "Toward Customized Hybrid Fuel-Cell and Battery-powered
                 Mobile Device for Individual Users",
  journal =      j-TECS,
  volume =       "18",
  number =       "6",
  pages =        "1--20",
  month =        jan,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362033",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 23 06:51:29 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362033",
  abstract =     "Rapidly evolving technologies and applications of
                 mobile devices inevitably increase the power demands on
                 the battery. However, the development of batteries can
                 hardly keep pace with the fast-growing demands, leading
                 to short battery life, which becomes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "123",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Han:2020:BAP,
  author =       "Jian-Jun Han and Sunlu Gong and Zhenjiang Wang and Wen
                 Cai and Dakai Zhu and Laurence T. Yang",
  title =        "Blocking-Aware Partitioned Real-Time Scheduling for
                 Uniform Heterogeneous Multicore Platforms",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "1:1--1:25",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3366683",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3366683",
  abstract =     "Heterogeneous multicore processors have recently
                 become de facto computing engines for state-of-the-art
                 embedded applications. Nonetheless, very little
                 research focuses on the scheduling of periodic
                 (implicit-deadline) real-time tasks upon heterogeneous
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Marco:2020:ODL,
  author =       "Vicent Sanz Marco and Ben Taylor and Zheng Wang and
                 Yehia Elkhatib",
  title =        "Optimizing Deep Learning Inference on Embedded Systems
                 Through Adaptive Model Selection",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "2:1--2:28",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3371154",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3371154",
  abstract =     "Deep neural networks (DNNs) are becoming a key
                 enabling technique for many application domains.
                 However, on-device inference on battery-powered,
                 resource-constrained embedding systems is often
                 infeasible due to prohibitively long inferencing time
                 and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Azari:2020:ETO,
  author =       "Elham Azari and Sarma Vrudhula",
  title =        "{ELSA}: a Throughput-Optimized Design of an {LSTM}
                 Accelerator for Energy-Constrained Devices",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "3:1--3:21",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3366634",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3366634",
  abstract =     "The next significant step in the evolution and
                 proliferation of artificial intelligence technology
                 will be the integration of neural network (NN) models
                 within embedded and mobile systems. This calls for the
                 design of compact, energy efficient NN models
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jayakodi:2020:DOE,
  author =       "Nitthilan Kanappan Jayakodi and Syrine Belakaria and
                 Aryan Deshwal and Janardhan Rao Doppa",
  title =        "Design and Optimization of Energy-Accuracy Tradeoff
                 Networks for Mobile Platforms via Pretrained Deep
                 Models",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "4:1--4:24",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3366636",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3366636",
  abstract =     "Many real-world edge applications including object
                 detection, robotics, and smart health are enabled by
                 deploying deep neural networks (DNNs) on
                 energy-constrained mobile platforms. In this article,
                 we propose a novel approach to trade off energy and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Romaszkan:2020:PPP,
  author =       "Wojciech Romaszkan and Tianmu Li and Puneet Gupta",
  title =        "{3PXNet}: Pruned-Permuted-Packed {XNOR} Networks for
                 Edge Machine Learning",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "5:1--5:23",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3371157",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3371157",
  abstract =     "As the adoption of Neural Networks continues to
                 proliferate different classes of applications and
                 systems, edge devices have been left behind. Their
                 strict energy and storage limitations make them unable
                 to cope with the sizes of common network models.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lang:2020:DIE,
  author =       "Clemens Lang and Isabella Stilkerich",
  title =        "Design and Implementation of an Escape Analysis in the
                 Context of Safety-Critical Embedded Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "6:1--6:20",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372133",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372133",
  abstract =     "The use of a managed, type-safe language such as
                 Standard ML, Ada Ravenscar, or Java in hard real-time
                 and embedded systems offers productivity, safety, and
                 dependability benefits at a reasonable cost. Static
                 software systems, that is systems in which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2020:BCL,
  author =       "Wenjian He and Sanjeev Das and Wei Zhang and Yang
                 Liu",
  title =        "{BBB-CFI}: Lightweight {CFI} Approach Against
                 Code-Reuse Attacks Using Basic Block Information",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "7:1--7:22",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3371151",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3371151",
  abstract =     "Code-reuse attack is a concrete threat to computing
                 systems because it can evade conventional security
                 defenses. Control flow integrity (CFI) is proposed to
                 repel this threat. However, former implementations of
                 CFI suffer from two major drawbacks: \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lizarraga:2020:AMB,
  author =       "Adrian Lizarraga and Jonathan Sprinkle and Roman
                 Lysecky",
  title =        "Automated Model-Based Optimization of Data-Adaptable
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "1",
  pages =        "8:1--8:22",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372142",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 15 07:25:13 MST 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372142",
  abstract =     "Dynamic data-driven applications such as object
                 tracking, surveillance, and other sensing and decision
                 applications are largely dependent on the
                 characteristics of the data streams on which they
                 operate. The underlying models and algorithms of data-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2020:PGI,
  author =       "Sumana Ghosh and Soumyajit Dey and Pallab Dasgupta",
  title =        "Pattern Guided Integrated Scheduling and Routing in
                 Multi-Hop Control Networks",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "9:1--9:28",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372134",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372134",
  abstract =     "Executing a set of control loops over a shared
                 multi-hop (wireless) control network (MCN) requires
                 careful co-scheduling of the control tasks and the
                 routing of sensory/actuation messages over the MCN. In
                 this work, we establish pattern guided aperiodic
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2020:QEO,
  author =       "Fupeng Chen and Heng Yu and Yajun Ha",
  title =        "Quality Estimation and Optimization of Adaptive Stereo
                 Matching Algorithms for Smart Vehicles",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "10:1--10:24",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372784",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372784",
  abstract =     "Stereo matching is a promising approach for smart
                 vehicles to find the depth of nearby objects.
                 Transforming a traditional stereo matching algorithm to
                 its adaptive version has potential advantages to
                 achieve the maximum quality (depth accuracy) in a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nejatollahi:2020:SFA,
  author =       "Hamid Nejatollahi and Felipe Valencia and Subhadeep
                 Banik and Francesco Regazzoni and Rosario Cammarota and
                 Nikil Dutt",
  title =        "Synthesis of Flexible Accelerators for Early Adoption
                 of Ring-{LWE} Post-quantum Cryptography",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "11:1--11:17",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3378164",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378164",
  abstract =     "The advent of the quantum computer makes current
                 public-key infrastructure insecure. Cryptography
                 community is addressing this problem by designing,
                 efficiently implementing, and evaluating novel
                 public-key algorithms capable of withstanding quantum
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Navarro:2020:MLM,
  author =       "Osvaldo Navarro and Jones Yudi and Javier Hoffmann and
                 Hector Gerardo Mu{\~n}oz Hernandez and Michael
                 H{\"u}bner",
  title =        "A Machine Learning Methodology for Cache Memory Design
                 Based on Dynamic Instructions",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "12:1--12:20",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3376920",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3376920",
  abstract =     "Cache memories are an essential component of modern
                 processors and consume a large percentage of their
                 power consumption. Its efficacy depends heavily on the
                 memory demands of the software. Thus, finding the
                 optimal cache for a particular program is not
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kindt:2020:EMB,
  author =       "Philipp H. Kindt and Daniel Yunge and Robert Diemer
                 and Samarjit Chakraborty",
  title =        "Energy Modeling for the {Bluetooth} Low Energy
                 Protocol",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "13:1--13:32",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3379339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3379339",
  abstract =     "Bluetooth Low Energy (BLE) is a wireless protocol
                 optimized for low-power communication. To design
                 energy-efficient devices, the protocol provides a
                 number of parameters that need to be optimized within
                 an energy, latency, and throughput design space.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Luppold:2020:CWC,
  author =       "Arno Luppold and Dominic Oehlert and Heiko Falk",
  title =        "Compiling for the Worst Case: Memory Allocation for
                 Multi-task and Multi-core Hard Real-time Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "14:1--14:26",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3381752",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3381752",
  abstract =     "Modern embedded hard real-time systems feature
                 multiple tasks running on multiple processing cores.
                 Schedulability analysis of such systems is usually
                 performed on an abstract system level with each task
                 being represented as a black box with fixed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmad:2020:FFB,
  author =       "Afzal Ahmad and Muhammad Adeel Pasha",
  title =        "{FFConv}: an {FPGA}-based Accelerator for Fast
                 Convolution Layers in Convolutional Neural Networks",
  journal =      j-TECS,
  volume =       "19",
  number =       "2",
  pages =        "15:1--15:24",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3380548",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Mar 18 07:47:52 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3380548",
  abstract =     "Image classification is known to be one of the most
                 challenging problems in the domain of computer vision.
                 Significant research is being done on developing
                 systems and algorithms improving accuracy, performance,
                 area, and power consumption for related \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shukla:2020:TER,
  author =       "Sandeep K. Shukla",
  title =        "{TECS} Editorial: Rethinking and Re-evaluating in the
                 Time of Crisis",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "16e:1--16e:3",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3395923",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3395923",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ganapathy:2020:DDV,
  author =       "Sanjay Ganapathy and Swagath Venkataramani and
                 Giridhur Sriraman and Balaraman Ravindran and Anand
                 Raghunathan",
  title =        "{DyVEDeep}: Dynamic Variable Effort Deep Neural
                 Networks",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "16:1--16:24",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372882",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372882",
  abstract =     "Deep Neural Networks (DNNs) have advanced the
                 state-of-the-art in a variety of machine learning tasks
                 and are deployed in increasing numbers of products and
                 services. However, the computational requirements of
                 training and evaluating large-scale DNNs \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Smeets:2020:ARS,
  author =       "Hugues Smeets and Matteo Ceriotti and Pedro Jos{\'e}
                 Marr{\'o}n",
  title =        "Adapting Recursive Sinusoidal Software Oscillators for
                 Low-power Fixed-point Processors",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "17:1--17:26",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3378559",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378559",
  abstract =     "The growing field of the Internet of Things relies at
                 the bottom on components with very scarce computing
                 resources that currently do not allow complex
                 processing of sensed data. Any computation involving
                 Fast Fourier Transforms (FFT), Wavelet \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cheng:2020:DDT,
  author =       "Yuan Cheng and Guangya Li and Ngai Wong and Hai-Bao
                 Chen and Hao Yu",
  title =        "{DEEPEYE}: a Deeply Tensor-Compressed Neural Network
                 for Video Comprehension on Terminal Devices",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "18:1--18:25",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3381805",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3381805",
  abstract =     "Video object detection and action recognition
                 typically require deep neural networks (DNNs) with huge
                 number of parameters. It is thereby challenging to
                 develop a DNN video comprehension unit in
                 resource-constrained terminal devices. In this article,
                 we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aerabi:2020:DSE,
  author =       "Ehsan Aerabi and Milad Bohlouli and Mohammad Hasan
                 Ahmadi Livany and Mahdi Fazeli and Athanasios
                 Papadimitriou and David Hely",
  title =        "Design Space Exploration for Ultra-Low-Energy and
                 Secure {IoT MCUs}",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "19:1--19:34",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3384446",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3384446",
  abstract =     "This article explores the design space of secure
                 communication in ultra-low-energy IoT devices based on
                 Micro-Controller Units (MCUs). It tries to identify,
                 benchmark, and compare security-related design choices
                 in a Commercial-Off-The-Shelf (COTS) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2020:MMP,
  author =       "Hwajeong Seo and Kyuhwang An and Hyeokdong Kwon and
                 Zhi Hu",
  title =        "{Montgomery} Multiplication for Public Key
                 Cryptography on {MSP430X}",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "20:1--20:15",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387919",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387919",
  abstract =     "For traditional public key cryptography and
                 post-quantum cryptography, such as elliptic curve
                 cryptography and supersingular isogeny key
                 encapsulation, modular multiplication is the most
                 performance-critical operation among basic arithmetic
                 of these cryptographic schemes. For this reason, the
                 execution timing of such cryptographic schemes, which
                 may highly determine the service availability for
                 low-end microprocessors (e.g., 8-bit AVR, 16-bit
                 MSP430X, and 32-bit ARM Cortex-M), mainly relies on the
                 efficiency of modular multiplication on target embedded
                 processors.

                 In this article, we present new optimal modular
                 multiplication techniques based on the interleaved
                 Montgomery multiplication on 16-bit MSP430X
                 microprocessors, where the multiplication part is
                 performed in a hardware multiplier and the reduction
                 part is performed in a basic arithmetic logic unit
                 (ALU) with the optimal modular multiplication routine,
                 respectively. This two-step approach is effective for
                 the special modulus of NIST curves, SM2 curves, and
                 supersingular isogeny key encapsulation. We further
                 optimized the Montgomery reduction by using techniques
                 for Montgomery-friendly prime. This technique
                 significantly reduces the number of partial products.
                 To demonstrate the superiority of the proposed
                 implementation of Montgomery multiplication, we applied
                 the proposed method to the NIST P-256 curve, of which
                 the implementation improves the previous modular
                 multiplication operation by 23.6\% on 16-bit MSP430X
                 microprocessors and to the SM2 curve as well (first
                 implementation on 16-bit MSP430X
                 microcontrollers).

                 Moreover, secure countermeasures against timing attack
                 and simple power analysis are also applied to the
                 scalar multiplication of NIST P-256 and SM2 curves,
                 which achieve the 8,582,338 clock cycles (0.53 seconds
                 at 16 MHz) and 10,027,086 clock cycles (0.62 seconds at
                 16 MHz), respectively. The proposed Montgomery
                 multiplication is a generic method that can be applied
                 to other cryptographic schemes and microprocessors with
                 minor modifications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2020:RSD,
  author =       "Saurav Kumar Ghosh and Jaffer Sheriff R. C. and Vibhor
                 Jain and Soumyajit Dey",
  title =        "Reliable and Secure Design-Space-Exploration for
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "3",
  pages =        "21:1--21:29",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387927",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 8 17:07:32 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387927",
  abstract =     "Given the widespread deployment of cyber-physical
                 systems and their safety-critical nature, reliability
                 and security guarantees offered by such systems are of
                 paramount importance. While the security of such
                 systems against sensor attacks have garnered \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2020:NLD,
  author =       "Zhuoran Zhao and Kamyar Mirzazad Barijough and Andreas
                 Gerstlauer",
  title =        "Network-level Design Space Exploration of
                 Resource-constrained Networks-of-Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "22:1--22:26",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387918",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387918",
  abstract =     "Driven by recent advances in networking and computing
                 technologies, distributed application scenarios are
                 increasingly deployed on resource-constrained
                 processing platforms. This includes networked embedded
                 and cyber-physical systems as well as edge \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kadiyala:2020:LLA,
  author =       "Sai Praveen Kadiyala and Manaar Alam and Yash
                 Shrivastava and Sikhar Patranabis and Muhamed Fauzi Bin
                 Abbas and Arnab Kumar Biswas and Debdeep Mukhopadhyay
                 and Thambipillai Srikanthan",
  title =        "{LAMBDA: Lightweight Assessment of Malware for
                 emBeddeD} Architectures",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "23:1--23:31",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3390855",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3390855",
  abstract =     "Security is a critical aspect in many of the latest
                 embedded and IoT systems. Malware is one of the severe
                 threats of security for such devices. There have been
                 enormous efforts in malware detection and analysis;
                 however, occurrences of newer varieties \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Das:2020:ALS,
  author =       "Tuhin Subhra Das and Prasun Ghosal and Navonil
                 Chatterjee and Arnab Nath and Akash Banerjee and
                 Subhojyoti Khastagir",
  title =        "Application of Logical Sub-networking in
                 Congestion-aware Deadlock-free {SDmesh} Routing",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "24:1--24:26",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387928",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387928",
  abstract =     "An adaptive routing helps in evading early network
                 saturation by steering data packets through the less
                 congested area at the oppressive loaded situation.
                 However, performances of adaptive routing are not
                 always promising under all circumstances. Say
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chuang:2020:DDB,
  author =       "Yi-Jing Chuang and Shuo-Han Chen and Yuan-Hao Chang
                 and Yu-Pei Liang and Hsin-Wen Wei and Wei-Kuan Shih",
  title =        "{DSTL}: a Demand-Based Shingled Translation Layer for
                 Enabling Adaptive Address Mapping on {SMR} Drives",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "25:1--25:21",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391892",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3391892",
  abstract =     "Shingled magnetic recording (SMR) is regarded as a
                 promising technology for resolving the areal density
                 limitation of conventional magnetic recording hard disk
                 drives. Among different types of SMR drives,
                 drive-managed SMR (DM-SMR) requires no changes
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Behrouzian:2020:FAR,
  author =       "Amir Behrouzian and Hadi Alizadeh Ara and Marc Geilen
                 and Dip Goswami and Twan Basten",
  title =        "Firmness Analysis of Real-time Tasks",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "26:1--26:24",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398328",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3398328",
  abstract =     "( m, k )-firm real-time tasks require meeting the
                 deadline of at least m jobs out of any k consecutive
                 jobs. When compared to hard real-time tasks, $ (m, k) $
                 firm tasks open up the possibility of tighter
                 resource-dimensioning in implementations. Firmness
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liu:2020:AML,
  author =       "Ke Liu and Mengying Zhao and Lei Ju and Zhiping Jia
                 and Jingtong Hu and Chun Jason Xue",
  title =        "Applying Multiple Level Cell to Non-volatile {FPGAs}",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "27:1--27:22",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3400885",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3400885",
  abstract =     "Static random access memory- (SRAM) based field
                 programmable gate arrays (FPGAs) are currently facing
                 challenges of limited capacity and high leakage power.
                 To solve this problem, non-volatile memory (NVM) is
                 proposed as the alternative to build non-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sheikh:2020:EER,
  author =       "Saad Zia Sheikh and Muhammad Adeel Pasha",
  title =        "Energy-efficient Real-time Scheduling on Multicores: a
                 Novel Approach to Model Cache Contention",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "28:1--28:25",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399413",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3399413",
  abstract =     "With the increasing demand for higher performance, the
                 adoption of multicores has been a major stepping stone
                 in the evolution of hard real-time systems. Though the
                 computational bandwidth is increased due to parallel
                 processing, the indispensable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2020:GBT,
  author =       "Junyan Hu and Kenli Li and Chubo Liu and Keqin Li",
  title =        "Game-Based Task Offloading of Multiple Mobile Devices
                 with {QoS} in Mobile Edge Computing Systems of Limited
                 Computation Capacity",
  journal =      j-TECS,
  volume =       "19",
  number =       "4",
  pages =        "29:1--29:21",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jul 19 08:50:15 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3398038",
  abstract =     "Mobile edge computing (MEC) is becoming a promising
                 paradigm of providing computing servers, like cloud
                 computing, to Edge node. Compared to cloud servers,
                 MECs are deployed closer to mobile devices (MDs) and
                 can provide high quality-of-service (QoS \ldots{}).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saha:2021:MWR,
  author =       "Debasri Saha and Susmita Sur-Kolay",
  title =        "Minimization of {WCRT} with Recovery Assurance from
                 Hardware {Trojans} for Tasks on {FPGA}-based Cloud",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "1:1--1:25",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3409479",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3409479",
  abstract =     "Dynamic partial reconfiguration (DPR) enabled
                 FPGA-based Cloud architecture acts as a flexible and
                 efficient shared environment to facilitates application
                 support to users' request at low cost. While on one
                 hand we need to handle a variety of tasks, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Srinivasavarma:2021:TBC,
  author =       "Vegesna S. M. Srinivasavarma and Shiv Vidhyut and Noor
                 Mahammad S.",
  title =        "A {TCAM}-based Caching Architecture Framework for
                 Packet Classification",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "2:1--2:19",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3409109",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3409109",
  abstract =     "Packet Classification is the enabling function for
                 performing many networking applications like Integrated
                 Services, Differentiated Services, Access
                 Control/Firewalls, and Intrusion Detection. To cope
                 with high-speed links and ever-increasing bandwidth
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pazzaglia:2021:GWH,
  author =       "Paolo Pazzaglia and Youcheng Sun and Marco {Di
                 Natale}",
  title =        "Generalized Weakly Hard Schedulability Analysis for
                 Real-Time Periodic Tasks",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "3:1--3:26",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3404888",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3404888",
  abstract =     "The weakly hard real-time model is an abstraction for
                 applications, including control systems, that can
                 tolerate occasional deadline misses, but can also be
                 compromised if a sufficiently high number of late
                 terminations occur in a given time window. The
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paul:2021:ATA,
  author =       "Suraj Paul and Navonil Chatterjee and Prasun Ghosal
                 and Jean-Philippe Diguet",
  title =        "Adaptive Task Allocation and Scheduling on {NoC}-based
                 Multicore Platforms with Multitasking Processors",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "4:1--4:26",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3408324",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408324",
  abstract =     "The application workloads in modern multicore
                 platforms are becoming increasingly dynamic. It becomes
                 challenging when multiple applications need to be
                 executed in parallel in such systems. Mapping and
                 scheduling of these applications are critical for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Agarwal:2021:IPH,
  author =       "Sukarn Agarwal and Hemangee K. Kapoor",
  title =        "Improving the Performance of Hybrid Caches Using
                 Partitioned Victim Caching",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "5:1--5:27",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3411368",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3411368",
  abstract =     "Non-Volatile Memory technologies are coming as a
                 viable option on account of the high density and
                 low-leakage power over the conventional SRAM
                 counterpart. However, the increased write latency
                 reduces their chances as a substitute for SRAM. To
                 attenuate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2021:GCF,
  author =       "Jiaji He and Haocheng Ma and Yanjiang Liu and Yiqiang
                 Zhao",
  title =        "Golden Chip-Free {Trojan} Detection Leveraging {Trojan
                 Trigger}'s Side-Channel Fingerprinting",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "6:1--6:18",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3419105",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3419105",
  abstract =     "Hardware Trojans (HTs) have become a major threat for
                 the integrated circuit industry and supply chain and
                 have motivated numerous developments of HT detection
                 schemes. Although the side-channel HT detection
                 approach is among the most promising \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ivanov:2021:VSA,
  author =       "Radoslav Ivanov and Taylor J. Carpenter and James
                 Weimer and Rajeev Alur and George J. Pappas and Insup
                 Lee",
  title =        "Verifying the Safety of Autonomous Systems with Neural
                 Network Controllers",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "7:1--7:26",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3419742",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3419742",
  abstract =     "This article addresses the problem of verifying the
                 safety of autonomous systems with neural network (NN)
                 controllers. We focus on NNs with sigmoid/tanh
                 activations and use the fact that the sigmoid/tanh is
                 the solution to a quadratic differential \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ibrahim:2021:MFU,
  author =       "Omar Adel Ibrahim and Savio Sciancalepore and Gabriele
                 Oligeri and Roberto {Di Pietro}",
  title =        "{MAGNETO}: Fingerprinting {USB} Flash Drives via
                 Unintentional Magnetic Emissions",
  journal =      j-TECS,
  volume =       "20",
  number =       "1",
  pages =        "8:1--8:26",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3422308",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Jan 16 06:52:20 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3422308",
  abstract =     "Universal Serial Bus (USB) Flash Drives are nowadays
                 one of the most convenient and diffused means to
                 transfer files, especially when no Internet connection
                 is available. However, USB flash drives are also one of
                 the most common attack vectors used to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fard:2021:APP,
  author =       "Mahdi Mohammadpour Fard and Mahmood Hasanloo and Mehdi
                 Kargahi",
  title =        "Analytical Program Power Characterization for Battery
                 Depletion-time Estimation",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "9:1--9:9",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3421511",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421511",
  abstract =     "Appropriate battery selection is a major design
                 decision regarding the fast growth of battery-operated
                 devices like space rovers, wireless sensor network
                 nodes, rescue robots, and so on. Many such systems are
                 mission critical, where estimation of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ungureanu:2021:FAT,
  author =       "George Ungureanu and Jos{\'e} Edil {Guimar{\~a}es De
                 Medeiros} and Timmy Sundstr{\"o}m and Ingemar
                 S{\"o}derquist and Anders {\AA}hlander and Ingo
                 Sander",
  title =        "{ForSyDe-Atom}: Taming Complexity in Cyber Physical
                 System Design with Layers",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "10:1--10:27",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424667",
  abstract =     "We present ForSyDe-Atom, a formal framework intended
                 as an entry point for disciplined design of complex
                 cyber-physical systems. This framework provides a set
                 of rules for combining several domain-specific
                 languages as structured, enclosing layers to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2021:HCO,
  author =       "Keqin Li",
  title =        "Heuristic Computation Offloading Algorithms for Mobile
                 Users in Fog Computing",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "11:1--11:28",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3426852",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3426852",
  abstract =     "The investigation in this article makes the following
                 important contributions to combinatorial optimization
                 of computation offloading in fog computing. First, we
                 rigorously define the two problems of optimal
                 computation offloading with energy constraint
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dharmaraj:2021:OSP,
  author =       "Celia Dharmaraj and Vinita Vasudevan and Nitin
                 Chandrachoodan",
  title =        "Optimization of Signal Processing Applications Using
                 Parameterized Error Models for Approximate Adders",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "12:1--12:25",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3430509",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3430509",
  abstract =     "Approximate circuit design has gained significance in
                 recent years targeting error-tolerant applications. In
                 the literature, there have been several attempts at
                 optimizing the number of approximate bits of each
                 approximate adder in a system for a given \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Atoofian:2021:REG,
  author =       "Ehsan Atoofian and Zayan Shaikh and Ali Jannesari",
  title =        "Reducing Energy in {GPGPUs} through Approximate
                 Trivial Bypassing",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "13:1--13:27",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3429440",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3429440",
  abstract =     "General-purpose computing using graphics processing
                 units (GPGPUs) is an attractive option for acceleration
                 of applications with massively data-parallel tasks.
                 While performance of modern GPGPUs is increasing
                 rapidly, the power consumption of these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Carreon:2021:PET,
  author =       "Nadir A. Carreon and Sixing Lu and Roman Lysecky",
  title =        "Probabilistic Estimation of Threat Intrusion in
                 Embedded Systems for Runtime Detection",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "14:1--14:27",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3432590",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3432590",
  abstract =     "With billions of networked connected embedded systems,
                 the security historically provided by the isolation of
                 embedded systems is no longer sufficient. Millions of
                 new malware are created every month and zero-day
                 attacks are becoming an increasing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Akbari:2021:FHA,
  author =       "Ali Akbari and Jonathan Martinez and Roozbeh Jafari",
  title =        "Facilitating Human Activity Data Annotation via
                 Context-Aware Change Detection on Smartwatches",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "15:1--15:20",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431503",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431503",
  abstract =     "Annotating activities of daily living (ADL) is vital
                 for developing machine learning models for activity
                 recognition. In addition, it is critical for
                 self-reporting purposes such as in assisted living
                 where the users are asked to log their ADLs. However,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ko:2021:LCL,
  author =       "Yousun Ko and Alex Chadwick and Daniel Bates and
                 Robert Mullins",
  title =        "Lane Compression: a Lightweight Lossless Compression
                 Method for Machine Learning on Embedded Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "16:1--16:26",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431815",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431815",
  abstract =     "This article presents Lane Compression, a lightweight
                 lossless compression technique for machine learning
                 that is based on a detailed study of the statistical
                 properties of machine learning data. The proposed
                 technique profiles machine learning data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sepulveda:2021:BCA,
  author =       "Johanna Sep{\'u}lveda and Mathieu Gross and Andreas
                 Zankl and Georg Sigl",
  title =        "Beyond Cache Attacks: Exploiting the Bus-based
                 Communication Structure for Powerful On-Chip
                 Microarchitectural Attacks",
  journal =      j-TECS,
  volume =       "20",
  number =       "2",
  pages =        "17:1--17:23",
  month =        mar,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3433653",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 20 17:37:34 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3433653",
  abstract =     "System-on-Chips (SoCs) are a key enabling technology
                 for the Internet-of-Things (IoT), a hyper-connected
                 world where on- and inter-chip communication is
                 ubiquitous. SoCs usually integrate cryptographic
                 hardware cores for confidentiality and authentication
                 services. However, these components are prone to
                 implementation attacks. During the operation of a
                 cryptographic core, the secret key may passively be
                 inferred through cache observations. Access-driven
                 attacks exploiting these observations are therefore a
                 vital threat to SoCs operating in IoT environments.
                 Previous works have shown the feasibility of these
                 attacks in the SoC context. Yet, the SoC communication
                 structure can be used to further improve access-based
                 cache attacks. The communication attacks are not as
                 well-understood as other micro-architectural attacks.
                 It is important to raise the awareness of SoC designers
                 of such a threat. To this end, we present four
                 contributions. First, we demonstrate an improved
                 Prime+Probe attack on four different AES-128
                 implementations (original transformation tables,
                 T0-Only, T2KB, and S-Box). As a novelty, this attack
                 exploits the collisions of the bus-based SoC
                 communication to further increase its efficiency.
                 Second, we explore the impact of preloading on the
                 efficiency of our communication-optimized attack.
                 Third, we integrate three countermeasures (shuffling,
                 mini-tables, and Time-Division Multiple Access (TDMA)
                 bus arbitration) and evaluate their impact on the
                 attack. Although shuffling and mini-tables
                 countermeasures were proposed in previous work, their
                 application as countermeasures against the bus-based
                 attack was not studied before. In addition, TDMA as a
                 countermeasure for bus-based attacks is an original
                 contribution of this work. Fourth, we further discuss
                 the implications of our work in the SoC design and its
                 perspective with the new cryptographic primitives
                 proposed in the ongoing National Institute of Standard
                 and Technology Lightweight Cryptography competition.
                 The results show that our improved
                 communication-optimized attack is efficient, speeding
                 up full key recovery by up to 400 times when compared
                 to the traditional Prime+Probe technique. Moreover, the
                 protection techniques are feasible and effectively
                 mitigate the proposed improved attack.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mitra:2021:ERA,
  author =       "Tulika Mitra",
  title =        "Editorial: Reimagining {{\booktitle{ACM Transactions
                 on Embedded Computing Systems (TECS)}}}",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "18e:1--18e:3",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450438",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450438",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18e",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Langerman:2021:RTH,
  author =       "David Langerman and Alan George",
  title =        "Real-time, High-resolution Depth Upsampling on
                 Embedded Accelerators",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "18:1--18:22",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436878",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436878",
  abstract =     "High-resolution, low-latency apps in computer vision
                 are ubiquitous in today's world of mixed-reality
                 devices. These innovations provide a platform that can
                 leverage the improving technology of depth sensors and
                 embedded accelerators to enable higher-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Seo:2021:SBA,
  author =       "Hwajeong Seo and Pakize Sanal and Reza Azarderakhsh",
  title =        "{SIKE} in 32-bit {ARM} Processors Based on Redundant
                 Number System for {NIST} Level-{II}",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "19:1--19:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439733",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439733",
  abstract =     "We present an optimized implementation of the
                 post-quantum Supersingular Isogeny Key Encapsulation
                 (SIKE) for 32-bit ARMv7-A processors supporting NEON
                 engine (i.e., SIMD instruction). Unlike previous SIKE
                 implementations, finite field arithmetic is efficiently
                 implemented in a redundant representation, which avoids
                 carry propagation and pipeline stall. Furthermore, we
                 adopted several state-of-the-art engineering techniques
                 as well as hand-crafted assembly implementation for
                 high performance. Optimized implementations are ported
                 to Microsoft SIKE library written in ``a non-redundant
                 representation'' and evaluated in high-end 32-bit
                 ARMv7-A processors, such as ARM Cortex-A5, A7, and A15.
                 A full key-exchange execution of SIKEp503 is performed
                 in about 109 million cycles on ARM Cortex-A15
                 processors (i.e., 54.5 ms @2.0 GHz), which is about $
                 1.58 \times $ faster than previous state-of-the-art
                 work presented in CHES 18.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ma:2021:CSA,
  author =       "Mingze Ma and Rizos Sakellariou",
  title =        "Code-size-aware Scheduling of Synchronous Dataflow
                 Graphs on Multicore Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "20:1--20:24",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3440034",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440034",
  abstract =     "Synchronous dataflow graphs are widely used to model
                 digital signal processing and multimedia applications.
                 Self-timed execution is an efficient methodology for
                 the analysis and scheduling of synchronous dataflow
                 graphs. In this article, we propose a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yuan:2021:CCB,
  author =       "Bo Yuan and Xiaofen Lu and Ke Tang and Xin Yao",
  title =        "Cooperative Coevolution-based Design Space Exploration
                 for Multi-mode Dataflow Mapping",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "21:1--21:25",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3440246",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440246",
  abstract =     "Some signal processing and multimedia applications can
                 be specified by synchronous dataflow (SDF) models. The
                 problem of SDF mapping to a given set of heterogeneous
                 processors has been known to be NP-hard and widely
                 studied in the design automation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leon:2021:IPP,
  author =       "Vasileios Leon and George Lentaris and Evangelos
                 Petrongonas and Dimitrios Soudris and Gianluca Furano
                 and Antonis Tavoularis and David Moloney",
  title =        "Improving Performance-Power-Programmability in Space
                 Avionics with Edge Devices: {VBN} on Myriad2 {SoC}",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "22:1--22:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3440885",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440885",
  abstract =     "The advent of powerful edge devices and AI algorithms
                 has already revolutionized many terrestrial
                 applications; however, for both technical and
                 historical reasons, the space industry is still
                 striving to adopt these key enabling technologies in
                 new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shamsa:2021:UUB,
  author =       "Elham Shamsa and Alma Pr{\"o}bstl and Nima TaheriNejad
                 and Anil Kanduri and Samarjit Chakraborty and Amir M.
                 Rahmani and Pasi Liljeberg",
  title =        "{UBAR}: User- and Battery-aware Resource Management
                 for Smartphones",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "23:1--23:25",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441644",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441644",
  abstract =     "Smartphone users require high Battery Cycle Life (BCL)
                 and high Quality of Experience (QoE) during their
                 usage. These two objectives can be conflicting based on
                 the user preference at run-time. Finding the best
                 trade-off between QoE and BCL requires an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rottleuthner:2021:SYP,
  author =       "Michel Rottleuthner and Thomas C. Schmidt and Matthias
                 W{\"a}hlisch",
  title =        "Sense Your Power: The {ECO} Approach to Energy
                 Awareness for {IoT} Devices",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "24:1--24:25",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441643",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441643",
  abstract =     "Energy-constrained sensor nodes can adaptively
                 optimize their energy consumption if a continuous
                 measurement is provided. This is of particular
                 importance in scenarios of high dynamics such as with
                 energy harvesting. Still, self-measuring of power
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Marshall:2021:PCP,
  author =       "James Marshall and Robert Gifford and Gedare Bloom and
                 Gabriel Parmer and Rahul Simha",
  title =        "Precise Cache Profiling for Studying Radiation
                 Effects",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "25:1--25:25",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442339",
  abstract =     "Increased access to space has led to an increase in
                 the usage of commodity processors in radiation
                 environments. These processors are vulnerable to
                 transient faults such as single event upsets that may
                 cause bit-flips in processor components. Caches in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Poudel:2021:MFU,
  author =       "Prawar Poudel and Biswajit Ray and Aleksandar
                 Milenkovic",
  title =        "Microcontroller Fingerprinting Using Partially Erased
                 {NOR} Flash Memory Cells",
  journal =      j-TECS,
  volume =       "20",
  number =       "3",
  pages =        "26:1--26:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448271",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Apr 24 07:51:05 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448271",
  abstract =     "Electronic device fingerprints, unique bit vectors
                 extracted from device's physical properties, are used
                 to differentiate between instances of functionally
                 identical devices. This article introduces a new
                 technique that extracts fingerprints from unique
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Girault:2021:ISI,
  author =       "Alain Girault and Reinhard {Von Hanxleden}",
  title =        "Introduction to the Special Issue on {Specification
                 and Design Languages (FDL 2019)}",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "27:1--27:3",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458748",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458748",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shi:2021:TGH,
  author =       "Zhendong Shi and Haocheng Ma and Qizhi Zhang and
                 Yanjiang Liu and Yiqiang Zhao and Jiaji He",
  title =        "Test Generation for Hardware {Trojan} Detection Using
                 Correlation Analysis and Genetic Algorithm",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "28:1--28:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446837",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446837",
  abstract =     "Hardware Trojan (HT) is a major threat to the security
                 of integrated circuits (ICs). Among various HT
                 detection approaches, side channel analysis (SCA)-based
                 methods have been extensively studied. SCA-based
                 methods try to detect HTs by comparing side \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jackson:2021:EES,
  author =       "Riley Jackson and Jonathan Gresl and Ramon Lawrence",
  title =        "Efficient External Sorting for Memory-Constrained
                 Embedded Devices with Flash Memory",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "29:1--29:21",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446976",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446976",
  abstract =     "Embedded devices are ubiquitous in areas of industrial
                 and environmental monitoring, health and safety, and
                 consumer appliances. A common use case is data
                 collection, processing, and performing actions based on
                 data analysis. Although many Internet of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rahman:2021:LTW,
  author =       "Mahbubur Rahman and Dali Ismail and Venkata P.
                 Modekurthy and Abusayeed Saifullah",
  title =        "{LPWAN} in the {TV} White Spaces: a Practical
                 Implementation and Deployment Experiences",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "30:1--30:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447877",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447877",
  abstract =     "Low-Power Wide-Area Network (LPWAN) is an enabling
                 Internet-of-Things technology that supports long-range,
                 low-power, and low-cost connectivity to numerous
                 devices. To avoid the crowd in the limited ISM band
                 (where most LPWANs operate) and cost of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bombieri:2021:SIS,
  author =       "Nicola Bombieri and Silvia Scaffeo and Antonio
                 Mastrandrea and Simone Caligola and Tommaso Carlucci
                 and Franco Fummi and Carlo Laudanna and Gabriela
                 Constantin and Rosalba Giugno",
  title =        "{SystemC} Implementation of Stochastic {Petri} Nets
                 for Simulation and Parameterization of Biological
                 Networks",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "31:1--31:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3427091",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3427091",
  abstract =     "Model development and simulation of biological
                 networks is recognized as a key task in Systems
                 Biology. Integrated with in vitro and in vivo
                 experimental data, network simulation allows for the
                 discovery of the dynamics that regulate biological
                 systems. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gressl:2021:DSE,
  author =       "Lukas Gressl and Christian Steger and Ulrich Neffe",
  title =        "Design Space Exploration for Secure {IoT} Devices and
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "32:1--32:24",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3430372",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3430372",
  abstract =     "With the advent of the Internet of Things (IoT) and
                 Cyber-Physical Systems (CPS), embedded devices have
                 been gaining importance in our daily lives, as well as
                 industrial processes. Independent of their usage, be it
                 within an IoT system or a CPS, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bruns:2021:TMC,
  author =       "Friederike Bruns and Irune Yarza and Philipp
                 Ittershagen and Kim Gr{\"u}ttner",
  title =        "Time Measurement and Control Blocks for Bare-Metal
                 {C++} Applications",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "34:1--34:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434401",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434401",
  abstract =     "Precisely timed execution of resource constrained
                 bare-metal applications is difficult, because the
                 embedded software developer usually has to implement
                 and check the timeliness of the executed application
                 through manual interaction with timers or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dupont:2021:EBH,
  author =       "Guillaume Dupont and Yamine Ait-Ameur and Neeraj Kumar
                 Singh and Marc Pantel",
  title =        "{Event-B} Hybridation: a Proof and Refinement-based
                 Framework for Modelling Hybrid Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "35:1--35:37",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448270",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448270",
  abstract =     "Hybrid systems are complex systems where a software
                 controller interacts with a physical environment,
                 usually named a plant, through sensors and actuators.
                 The specification and design of such systems usually
                 rely on the description of both continuous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schulz-Rosengarten:2021:TOO,
  author =       "Alexander Schulz-Rosengarten and Steven Smyth and
                 Michael Mendler",
  title =        "Toward Object-oriented Modeling in {SCCharts}",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "37:1--37:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453482",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453482",
  abstract =     "Object orientation is a powerful and widely used
                 paradigm for abstraction and structuring in
                 programming. Many languages are designed with this
                 principle or support different degrees of object
                 orientation. In synchronous languages, originally
                 developed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Broman:2021:IPM,
  author =       "David Broman",
  title =        "Interactive Programmatic Modeling",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "33:1--33:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3431387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3431387",
  abstract =     "Modeling and computational analyses are fundamental
                 activities within science and engineering. Analysis
                 activities can take various forms, such as simulation
                 of executable models, formal verification of model
                 properties, or inference of hidden model \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lohstroh:2021:TLF,
  author =       "Marten Lohstroh and Christian Menard and Soroush
                 Bateni and Edward A. Lee",
  title =        "Toward a Lingua Franca for Deterministic Concurrent
                 Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "4",
  pages =        "36:1--36:27",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448128",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sun Jun 6 07:03:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448128",
  abstract =     "Many programming languages and programming frameworks
                 focus on parallel and distributed computing. Several
                 frameworks are based on actors, which provide a more
                 disciplined model for concurrency than threads. The
                 interactions between actors, however, if \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shrivastava:2020:ISIa,
  author =       "Aviral Shrivastava and Jian-Jia Chen and Youtao
                 Zhang",
  title =        "Introduction to the Special Issue on Languages,
                 Compilers, Tools, and Theory of Embedded Systems: {Part
                 1}",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "30:1--30:3",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3417732",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3417732",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2020:DAR,
  author =       "Wanli Chang and Ran Wei and Shuai Zhao and Andy
                 Wellings and Jim Woodcock and Alan Burns",
  title =        "Development Automation of Real-Time {Java}:
                 Model-Driven Transformation and Synthesis",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "31:1--31:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391897",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391897",
  abstract =     "Many applications in emerging scenarios, such as
                 autonomous vehicles, intelligent robots, and industrial
                 automation, are safety-critical with strict timing
                 requirements. However, the development of real-time
                 systems is error prone and highly dependent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Venkataramani:2020:SSD,
  author =       "Vanchinathan Venkataramani and Aditi Kulkarni and
                 Tulika Mitra and Li-Shiuan Peh",
  title =        "{SPECTRUM}: a Software-defined Predictable Many-core
                 Architecture for {LTE\slash 5G} Baseband Processing",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "32:1--32:28",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3400032",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3400032",
  abstract =     "Wireless communication standards such as Long-term
                 Evolution (LTE) are rapidly changing to support the
                 high data-rate of wireless devices. The physical layer
                 baseband processing has strict real-time deadlines,
                 especially in the next-generation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reghenzani:2020:DUP,
  author =       "Federico Reghenzani and Luca Santinelli and William
                 Fornaciari",
  title =        "Dealing with Uncertainty in {pWCET} Estimations",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "33:1--33:23",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396234",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3396234",
  abstract =     "The problem of estimating a tight and safe Worst-Case
                 Execution Time (WCET), needed for certification in
                 safety-critical environment, is a challenging problem
                 for modern embedded systems. A possible solution
                 proposed in past years is to exploit \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Calderon:2020:GUE,
  author =       "Alejandro J. Calder{\'o}n and Leonidas Kosmidis and
                 Carlos F. Nicol{\'a}s and Francisco J. Cazorla and Peio
                 Onaindia",
  title =        "{GMAI}: Understanding and Exploiting the Internals of
                 {GPU} Resource Allocation in Critical Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "34:1--34:23",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391896",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391896",
  abstract =     "Critical real-time systems require strict resource
                 provisioning in terms of memory and timing. The
                 constant need for higher performance in these systems
                 has led industry to recently include GPUs. However, GPU
                 software ecosystems are by their nature \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2020:CTC,
  author =       "Chundong Wang and Sudipta Chattopadhyay and Gunavaran
                 Brihadiswarn",
  title =        "{Crab-tree}: a Crash Recoverable {B+}-tree Variant for
                 Persistent Memory with {ARMv8} Architecture",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "35:1--35:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396236",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3396236",
  abstract =     "In recent years, the next-generation non-volatile
                 memory (NVM) technologies have emerged with DRAM-like
                 byte addressability and disk-like durability. Computer
                 architects have proposed to use them to build
                 persistent memory that blurs the conventional
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bresch:2020:TXP,
  author =       "Cyril Bresch and David H{\'e}ly and Roman Lysecky and
                 St{\'e}phanie Chollet and Ioannis Parissis",
  title =        "{TrustFlow-X}: a Practical Framework for Fine-grained
                 Control-flow Integrity in Critical Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "36:1--36:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3398327",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3398327",
  abstract =     "This article addresses the challenges of memory safety
                 in life-critical medical devices. Since the last
                 decade, healthcare manufacturers have embraced the
                 Internet of Things, pushing technological innovations
                 to increase market share. Medical devices, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lukyanov:2020:FVS,
  author =       "Georgy Lukyanov and Andrey Mokhov and Jakob Lechner",
  title =        "Formal Verification of Spacecraft Control Programs",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "37:1--37:18",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391900",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391900",
  abstract =     "Verification of correctness of control programs is an
                 essential task in the development of space electronics;
                 it is difficult and typically outweighs design and
                 programming tasks in terms of development hours. This
                 article presents a verification \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kadiyala:2020:HPC,
  author =       "Sai Praveen Kadiyala and Pranav Jadhav and Siew-Kei
                 Lam and Thambipillai Srikanthan",
  title =        "Hardware Performance Counter-Based Fine-Grained
                 Malware Detection",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "38:1--38:17",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3403943",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3403943",
  abstract =     "Detection of malicious programs using hardware-based
                 features has gained prominence recently. The
                 tamper-resistant hardware metrics prove to be a better
                 security feature than the high-level software metrics,
                 which can be easily obfuscated. Hardware \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Stitt:2020:PAI,
  author =       "Greg Stitt and David Campbell",
  title =        "{PANDORA}: an Architecture-Independent Parallelizing
                 Approximation-Discovery Framework",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "39:1--39:17",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391899",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391899",
  abstract =     "In this article, we introduce a parallelizing
                 approximation-discovery framework, PANDORA, for
                 automatically discovering application- and
                 architecture-specialized approximations of provided
                 code. PANDORA complements existing compilers and
                 runtime \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Atoofian:2020:ACG,
  author =       "Ehsan Atoofian",
  title =        "Approximate Cache in {GPGPUs}",
  journal =      j-TECS,
  volume =       "19",
  number =       "5",
  pages =        "40:1--40:22",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3407904",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:34:59 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3407904",
  abstract =     "There is a growing number of application domains
                 ranging from multimedia to machine learning where a
                 certain level of inexactness can be tolerated. For
                 these applications, approximate computing is an
                 effective technique that trades off some loss in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shrivastava:2020:ISIb,
  author =       "Aviral Shrivastava and Jian-Jia Chen and Youtao
                 Zhang",
  title =        "Introduction to the Special Issue on Languages,
                 Compilers, Tools, and Theory of Embedded Systems: {Part
                 2}",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "41:1--41:2",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3417734",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3417734",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hsiao:2020:CHC,
  author =       "Luke Hsiao and Sen Wu and Nicholas Chiang and
                 Christopher R{\'e} and Philip Levis",
  title =        "Creating Hardware Component Knowledge Bases with
                 Training Data Generation and Multi-task Learning",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "42:1--42:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391906",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391906",
  abstract =     "Hardware component databases are vital resources in
                 designing embedded systems. Since creating these
                 databases requires hundreds of thousands of hours of
                 manual data entry, they are proprietary, limited in the
                 data they provide, and have random data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Samragh:2020:ERB,
  author =       "Mohammad Samragh and Mojan Javaheripi and Farinaz
                 Koushanfar",
  title =        "{EncoDeep}: Realizing Bit-flexible Encoding for Deep
                 Neural Networks",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "43:1--43:29",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391901",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391901",
  abstract =     "This article proposes EncoDeep, an end-to-end
                 framework that facilitates encoding, bitwidth
                 customization, fine-tuning, and implementation of
                 neural networks on FPGA platforms. EncoDeep
                 incorporates nonlinear encoding to the computation flow
                 of neural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khan:2020:OTC,
  author =       "Asif Ali Khan and Norman A. Rink and Fazal Hameed and
                 Jeronimo Castrillon",
  title =        "Optimizing Tensor Contractions for Embedded Devices
                 with Racetrack and {DRAM} Memories",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "44:1--44:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396235",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3396235",
  abstract =     "Tensor contraction is a fundamental operation in many
                 algorithms with a plethora of applications ranging from
                 quantum chemistry over fluid dynamics and image
                 processing to machine learning. The performance of
                 tensor computations critically depends on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2020:FEE,
  author =       "Saad Ahmed and Naveed Anwar Bhatti and Muhammad Hamad
                 Alizai and Junaid Haroon Siddiqui and Luca Mottola",
  title =        "Fast and Energy-Efficient State Checkpointing for
                 Intermittent Computing",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "45:1--45:27",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391903",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391903",
  abstract =     "Intermittently powered embedded devices ensure forward
                 progress of programs through state checkpointing in
                 non-volatile memory. Checkpointing is, however,
                 expensive in energy and adds to the execution times. To
                 minimize this overhead, we present DICE, a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2020:DIA,
  author =       "Xinyi Li and Lei Zhang and Xipeng Shen",
  title =        "{DIAC}: an Inter-app Conflicts Detector for Open {IoT}
                 Systems",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "46:1--46:25",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391895",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391895",
  abstract =     "This article tackles the problem of detecting and
                 solving potential conflicts among independently
                 developed apps that are to be installed into an open
                 Internet-of-Things (IoT) environment. It provides a new
                 set of definitions and categorizations of the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2020:DEC,
  author =       "Saad Ahmed and Muhammad Nawaz and Abu Bakar and Naveed
                 Anwar Bhatti and Muhammad Hamad Alizai and Junaid
                 Haroon Siddiqui and Luca Mottola",
  title =        "Demystifying Energy Consumption Dynamics in
                 Transiently powered Computers",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "47:1--47:25",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391893",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391893",
  abstract =     "Transiently powered computers (TPCs) form the
                 foundation of the battery-less Internet of Things,
                 using energy harvesting and small capacitors to power
                 their operation. This kind of power supply is
                 characterized by extreme variations in supply voltage,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wade:2020:EIP,
  author =       "April W. Wade and Prasad A. Kulkarni and Michael R.
                 Jantz",
  title =        "Exploring Impact of Profile Data on Code Quality in
                 the {HotSpot JVM}",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "48:1--48:26",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391894",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/java2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391894",
  abstract =     "Managed language virtual machines (VM) rely on dynamic
                 or just-in-time (JIT) compilation to generate optimized
                 native code at run-time to deliver high execution
                 performance. Many VMs and JIT compilers collect profile
                 data at run-time to enable profile-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Reissmann:2020:RIR,
  author =       "Nico Reissmann and Jan Christian Meyer and Helge
                 Bahmann and Magnus Sj{\"a}lander",
  title =        "{RVSDG}: an Intermediate Representation for Optimizing
                 Compilers",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "49:1--49:28",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391902",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391902",
  abstract =     "Intermediate Representations (IRs) are central to
                 optimizing compilers as the way the program is
                 represented may enhance or limit analyses and
                 transformations. Suitable IRs focus on exposing the
                 most relevant information and establish invariants that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Latifis:2020:RMC,
  author =       "Ioannis Latifis and Karthick Parashar and Grigoris
                 Dimitroulakos and Hans Cappelle and Christakis Lezos
                 and Konstantinos Masselos and Francky Catthoor",
  title =        "A Retargetable {MATLAB-to-C} Compiler Exploiting
                 Custom Instructions and Data Parallelism",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "50:1--50:27",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391898",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/matlab.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3391898",
  abstract =     "This article presents a MATLAB-to-C compiler that
                 exploits custom instructions present in
                 state-of-the-art processor architectures and supports
                 semi-automatic vectorization. A parameterized processor
                 model is used to describe the target instruction set
                 architecture to achieve user-friendly retargetability.
                 Custom instructions are represented via specialized
                 intrinsic functions in the generated code, which can
                 then be used as input to any C/C++ compiler supporting
                 the target processor. In addition, the compiler
                 supports the generation of data parallel\slash
                 vectorized code through the introduction of data
                 packing\slash unpacking statements. The compiler has
                 been used for code generation targeting ARM and x86
                 architectures for several benchmarks. The vectorized
                 code generated by the compiler achieves an average
                 speedup of 4.1 $ \times $ and 2.7 $ \times $ for packed
                 fixed and floating point data, respectively, compared
                 to scalarized code for ARM architecture and an average
                 speedup of 3.1 $ \times $ and 1.5 $ \times $ for packed
                 fixed and floating point data, respectively, for x86
                 architecture. Implementing data parallel instructions
                 directly in the assembly code would have required a lot
                 of design effort, and it would not been sustainable
                 across evolving platform variants. Thus, the compiler
                 can be employed to efficiently speed up critical
                 sections of the target application. The compiler is
                 therefore potentially employable to raise the design
                 abstraction and reduce development time for both
                 embedded and general-purpose applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Susu:2020:VLA,
  author =       "Alexandru E. Susu",
  title =        "A Vector-Length Agnostic Compiler for the {Connex-S}
                 Accelerator with Scratchpad Memory",
  journal =      j-TECS,
  volume =       "19",
  number =       "6",
  pages =        "51:1--51:30",
  month =        nov,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406536",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:18 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406536",
  abstract =     "Compiling sequential C programs for Connex-S, a
                 competitive, scalable and customizable, wide vector
                 accelerator for intensive embedded applications with 32
                 to 4,096 16-bit integer lanes and a limited capacity
                 local scratchpad memory, is challenging. Our \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2021:D,
  author =       "Edward A. Lee",
  title =        "Determinism",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "38:1--38:34",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453652",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453652",
  abstract =     "This article is about deterministic models, what they
                 are, why they are useful, and what their limitations
                 are. First, the article emphasizes that determinism is
                 a property of models, not of physical systems. Whether
                 a model is deterministic or not \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leon:2021:IPD,
  author =       "Vasileios Leon and Theodora Paparouni and Evangelos
                 Petrongonas and Dimitrios Soudris and Kiamal
                 Pekmestzi",
  title =        "Improving Power of {DSP} and {CNN} Hardware
                 Accelerators Using Approximate Floating-point
                 Multipliers",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "39:1--39:21",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448980",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448980",
  abstract =     "Approximate computing has emerged as a promising
                 design alternative for delivering power-efficient
                 systems and circuits by exploiting the inherent error
                 resiliency of numerous applications. The current
                 article aims to tackle the increased hardware cost of
                 floating-point multiplication units, which prohibits
                 their usage in embedded computing. We introduce AFMU
                 (Approximate Floating-point MUltiplier), an
                 area/power-efficient family of multipliers, which apply
                 two approximation techniques in the resource-hungry
                 mantissa multiplication and can be seamlessly extended
                 to support dynamic configuration of the approximation
                 levels via gating signals. AFMU offers large accuracy
                 configuration margins, provides negligible logic
                 overhead for dynamic configuration, and detects
                 unexpected results that may arise due to the
                 approximations. Our evaluation shows that AFMU delivers
                 energy gains in the range 3.6\%--53.5\% for
                 half-precision and 37.2\%--82.4\% for single-precision,
                 in exchange for mean relative error around
                 0.05\%--3.33\% and 0.01\%--2.20\%, respectively. In
                 comparison with state-of-the-art multipliers, AFMU
                 exhibits up to 4--6 $ \times $ smaller error on average
                 while delivering more energy-efficient computing. The
                 evaluation in image processing shows that AFMU provides
                 sufficient quality of service, i.e., more than 50db
                 PSNR and near 1 SSIM values, and up to 57.4\% power
                 reduction. When used in floating-point CNNs, the
                 accuracy loss is small (or zero), i.e., up to 5.4\% for
                 MNIST and CIFAR-10, in exchange for up to 63.8\% power
                 gain.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Garcia:2021:IHG,
  author =       "Andr{\'e}s Amaya Garc{\'\i}a and David May and Ed
                 Nutting",
  title =        "Integrated Hardware Garbage Collection",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "40:1--40:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450147",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/csharp.bib;
                 https://www.math.utah.edu/pub/tex/bib/python.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450147",
  abstract =     "Garbage collected programming languages, such as
                 Python and C\#, have accelerated software development.
                 These modern languages increase productivity and
                 software reliability as they provide high-level data
                 representation and control structures. Modern languages
                 are widely used in software development for mobile,
                 desktop, and server devices, but their adoption is
                 limited in real-time embedded systems.\par

                 There is clear interest in supporting modern languages
                 in embedded devices as emerging markets, like the
                 Internet of Things, demand ever smarter and more
                 reliable products. Multiple commercial and open-source
                 projects, such as Zerynth and MicroPython, are
                 attempting to provide support. But these projects rely
                 on software garbage collectors that impose high
                 overheads and introduce unpredictable pauses,
                 preventing their use in many embedded applications.
                 These limitations arise from the unsuitability of
                 conventional processors for performing efficient,
                 predictable garbage collection.\par

                 We propose the Integrated Hardware Garbage Collector
                 (IHGC); a garbage collector tightly coupled with the
                 processor that runs continuously in the background.
                 Further, we introduce a static analysis technique to
                 guarantee that real-time programs are never paused by
                 the collector. Our design allocates a memory cycle to
                 the collector when the processor is not using the
                 memory. The IHGC achieves this by careful division of
                 collection work into single-memory-access steps that
                 are interleaved with the processor's memory accesses.
                 As a result, our collector eliminates run-time
                 overheads and enables real-time program
                 analysis.\par

                 The principles behind the IHGC can be used in
                 conjunction with existing architectures. For example,
                 we simulated the IHGC alongside the ARMv6-M
                 architecture. Compared to a conventional processor, our
                 experiments indicate that the IHGC offers 1.5--7 times
                 better performance for programs that rely on garbage
                 collection. The IHGC delivers the benefits of
                 garbage-collected languages with real-time performance
                 but without the complexity and overheads inherent in
                 software collectors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhou:2021:RAS,
  author =       "Yuanbin Zhou and Soheil Samii and Petru Eles and Zebo
                 Peng",
  title =        "Reliability-aware Scheduling and Routing for Messages
                 in Time-sensitive Networking",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "41:1--41:24",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458768",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458768",
  abstract =     "Time-sensitive Networking (TSN) on Ethernet is a
                 promising communication technology in the automotive
                 and industrial automation industries due to its
                 real-time and high-bandwidth communication
                 capabilities. Time-triggered scheduling and static
                 routing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Valente:2021:CMS,
  author =       "Giacomo Valente and Tiziana Fanni and Carlo Sau and
                 Tania {Di Mascio} and Luigi Pomante and Francesca
                 Palumbo",
  title =        "A Composable Monitoring System for Heterogeneous
                 Embedded Platforms",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "42:1--42:34",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461647",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461647",
  abstract =     "Advanced computations on embedded devices are nowadays
                 a must in any application field. Often, to cope with
                 such a need, embedded systems designers leverage on
                 complex heterogeneous reconfigurable platforms that
                 offer high performance, thanks to the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Akdur:2021:SGI,
  author =       "Deniz Akdur",
  title =        "Skills Gaps in the Industry: Opinions of Embedded
                 Software Practitioners",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "43:1--43:39",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3463340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3463340",
  abstract =     "Many practitioners in the software-intensive embedded
                 industry often face difficulties after beginning their
                 careers due to misalignment of the skills learned at
                 the university with what is required in the workplace.
                 Companies spend crucial resources to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aligholipour:2021:TTA,
  author =       "Rashid Aligholipour and Mohammad Baharloo and Behnam
                 Farzaneh and Meisam Abdollahi and Ahmad Khonsari",
  title =        "{TAMA}: Turn-aware Mapping and Architecture --- a
                 Power-efficient Network-on-Chip Approach",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "44:1--44:24",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462700",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462700",
  abstract =     "Nowadays, static power consumption in chip
                 multiprocessor (CMP) is the most crucial concern of
                 chip designers. Power-gating is an effective approach
                 to mitigate static power consumption particularly in
                 low utilization. Network-on-Chip (NoC) as the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Roy:2021:SQL,
  author =       "Sanjit Kumar Roy and Rajesh Devaraj and Arnab Sarkar
                 and Debabrata Senapati",
  title =        "{SLAQA}: Quality-level Aware Scheduling of Task Graphs
                 on Heterogeneous Distributed Systems",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "45:1--45:31",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462776",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462776",
  abstract =     "Continuous demands for higher performance and
                 reliability within stringent resource budgets is
                 driving a shift from homogeneous to heterogeneous
                 processing platforms for the implementation of today's
                 cyber-physical systems (CPSs). These CPSs are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Modekurthy:2021:DRT,
  author =       "Venkata P. Modekurthy and Abusayeed Saifullah and
                 Sanjay Madria",
  title =        "A Distributed Real-time Scheduling System for
                 Industrial Wireless Networks",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "46:1--46:28",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464429",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464429",
  abstract =     "The concept of Industry 4.0 introduces the unification
                 of industrial Internet-of-Things (IoT), cyber physical
                 systems, and data-driven business modeling to improve
                 production efficiency of the factories. To ensure high
                 production efficiency, Industry \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Forsberg:2021:PEM,
  author =       "Bj{\"o}rn Forsberg and Marco Solieri and Marko
                 Bertogna and Luca Benini and Andrea Marongiu",
  title =        "The Predictable Execution Model in Practice: Compiling
                 Real Applications for {COTS} Hardware",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "47:1--47:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3465370",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3465370",
  abstract =     "Adoption of multi- and many-core processors in
                 real-time systems has so far been slowed down, if not
                 totally barred, due do the difficulty in providing
                 analytical real-time guarantees on worst-case execution
                 times. The Predictable Execution Model (PREM)
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Maity:2021:SSO,
  author =       "Biswadip Maity and Bryan Donyanavard and Anmol
                 Surhonne and Amir Rahmani and Andreas Herkersdorf and
                 Nikil Dutt",
  title =        "{SEAMS}: Self-Optimizing Runtime Manager for
                 Approximate Memory Hierarchies",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "48:1--48:26",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466875",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466875",
  abstract =     "Memory approximation techniques are commonly limited
                 in scope, targeting individual levels of the memory
                 hierarchy. Existing approximation techniques for a full
                 memory hierarchy determine optimal configurations at
                 design-time provided a goal and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Witterauf:2021:SLC,
  author =       "Michael Witterauf and Dominik Walter and Frank Hannig
                 and J{\"u}rgen Teich",
  title =        "Symbolic Loop Compilation for Tightly Coupled
                 Processor Arrays",
  journal =      j-TECS,
  volume =       "20",
  number =       "5",
  pages =        "49:1--49:31",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466897",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 10 13:35:00 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466897",
  abstract =     "Tightly Coupled Processor Arrays (TCPAs), a class of
                 massively parallel loop accelerators, allow
                 applications to offload computationally expensive loops
                 for improved performance and energy efficiency. To
                 achieve these two goals, executing a loop on a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bur:2021:WCE,
  author =       "M{\'a}rton B{\'u}r and Krist{\'o}f Marussy and Brett
                 H. Meyer and D{\'a}niel Varr{\'o}",
  title =        "Worst-case Execution Time Calculation for Query-based
                 Monitors by Witness Generation",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "107:1--107:36",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3471904",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3471904",
  abstract =     "Runtime monitoring plays a key role in the assurance
                 of modern intelligent cyber-physical systems, which are
                 frequently data-intensive and safety-critical. While
                 graph queries can serve as an expressive yet formally
                 precise specification language to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2021:IML,
  author =       "Jurn-Gyu Park and Nikil Dutt and Sung-Soo Lim",
  title =        "An Interpretable Machine Learning Model Enhanced
                 Integrated {CPU--GPU DVFS} Governor",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "108:1--108:28",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3470974",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470974",
  abstract =     "Modern heterogeneous CPU-GPU-based mobile
                 architectures, which execute intensive mobile
                 gaming/graphics applications, use software governors to
                 achieve high performance with energy-efficiency.
                 However, existing governors typically utilize simple
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ray:2021:HAS,
  author =       "Kaustabha Ray and Ansuman Banerjee",
  title =        "Horizontal Auto-Scaling for Multi-Access Edge
                 Computing Using Safe Reinforcement Learning",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "109:1--109:33",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3475991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3475991",
  abstract =     "Multi-Access Edge Computing (MEC) has emerged as a
                 promising new paradigm allowing low latency access to
                 services deployed on edge servers to avert network
                 latencies often encountered in accessing cloud
                 services. A key component of the MEC environment is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aydin:2021:HSC,
  author =       "Furkan Aydin and Aydin Aysu and Mohit Tiwari and
                 Andreas Gerstlauer and Michael Orshansky",
  title =        "Horizontal Side-Channel Vulnerabilities of
                 Post-Quantum Key Exchange and Encapsulation Protocols",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "110:1--110:22",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476799",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476799",
  abstract =     "Key exchange protocols and key encapsulation
                 mechanisms establish secret keys to communicate digital
                 information confidentially over public channels.
                 Lattice-based cryptography variants of these protocols
                 are promising alternatives given their quantum-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Giraldo:2021:HAE,
  author =       "J. S. P. Giraldo and Marian Verhelst",
  title =        "Hardware Acceleration for Embedded Keyword Spotting:
                 Tutorial and Survey",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "111:1--111:25",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3474365",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3474365",
  abstract =     "In recent years, Keyword Spotting (KWS) has become a
                 crucial human-machine interface for mobile devices,
                 allowing users to interact more naturally with their
                 gadgets by leveraging their own voice. Due to privacy,
                 latency and energy requirements, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{RibeiroDaSilva:2021:MCH,
  author =       "Junio Cezar {Ribeiro Da Silva} and Lorena Le{\~a}o and
                 Vinicius Petrucci and Abdoulaye Gamati{\'e} and
                 Fernando Magno {Quint{\~a}o Pereira}",
  title =        "Mapping Computations in Heterogeneous Multicore
                 Systems with Statistical Regression on Program Inputs",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "112:1--112:35",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3478288",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3478288",
  abstract =     "A hardware configuration is a set of processors and
                 their frequency levels in a multicore heterogeneous
                 system. This article presents a compiler-based
                 technique to match functions with hardware
                 configurations. Such a technique consists of using
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2021:VSH,
  author =       "Yu Wang and Nima Roohi and Matthew West and Mahesh
                 Viswanathan and Geir E. Dullerud",
  title =        "Verifying Stochastic Hybrid Systems with Temporal
                 Logic Specifications via Model Reduction",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "113:1--113:27",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3483380",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483380",
  abstract =     "We present a scalable methodology to verify stochastic
                 hybrid systems for inequality linear temporal logic
                 (iLTL) or inequality metric interval temporal logic
                 (iMITL). Using the Mori--Zwanzig reduction method, we
                 construct a finite-state Markov chain \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "113",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Servais:2021:ACR,
  author =       "Jason Servais and Ehsan Atoofian",
  title =        "Adaptive Computation Reuse for Energy-Efficient
                 Training of Deep Neural Networks",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "114:1--114:24",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487025",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487025",
  abstract =     "In recent years, Deep Neural Networks (DNNs) have been
                 deployed into a diverse set of applications from voice
                 recognition to scene generation mostly due to their
                 high-accuracy. DNNs are known to be computationally
                 intensive applications, requiring a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "114",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saini:2021:IFC,
  author =       "Kanika Saini and Sheetal Kalra and Sandeep K. Sood",
  title =        "{IoT}-Fog-Cloud Centric Earthquake Monitoring and
                 Prediction",
  journal =      j-TECS,
  volume =       "20",
  number =       "6",
  pages =        "115:1--115:26",
  month =        nov,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487942",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Dec 10 11:17:19 MST 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487942",
  abstract =     "Earthquakes are among the most inevitable natural
                 catastrophes. The uncertainty about the severity of the
                 earthquake has a profound effect on the burden of
                 disaster and causes massive economic and societal
                 losses. Although unpredictable, it can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "115",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2022:ISIa,
  author =       "Yuan-Hao Chang and Jalil Boukhobza and Song Han",
  title =        "Introduction to the Special Issue on Memory and
                 Storage Systems for Embedded and {IoT} Applications",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "1:1--1:4",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3505283",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3505283",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Manohar:2022:CUC,
  author =       "Sheel Sindhu Manohar and Sparsh Mittal and Hemangee K.
                 Kapoor",
  title =        "{CORIDOR}: Using {COherence} and {TempoRal LocalIty}
                 to Mitigate Read Disurbance {ErrOR} in {STT--RAM}
                 Caches",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "2:1--2:24",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3484493",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3484493",
  abstract =     "In the deep sub-micron region, ``spin-transfer torque
                 RAM'' (STT-RAM) suffers from ``read-disturbance error''
                 (RDE), whereby a read operation disturbs the stored
                 data. Mitigation of RDE requires restore operations,
                 which imposes latency and energy penalties. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Marinelli:2022:MES,
  author =       "Tommaso Marinelli and Jos{\'e} Ignacio G{\'o}mez
                 P{\'e}rez and Christian Tenllado and Manu Komalan and
                 Mohit Gupta and Francky Catthoor",
  title =        "Microarchitectural Exploration of {STT--MRAM}
                 Last-level Cache Parameters for Energy-efficient
                 Devices",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "3:1--3:20",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3490391",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490391",
  abstract =     "As the technology scaling advances, limitations of
                 traditional memories in terms of density and energy
                 become more evident. Modern caches occupy a large part
                 of a CPU physical size and high static leakage poses a
                 limit to the overall efficiency of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wittig:2022:AES,
  author =       "Robert Wittig and Philipp Schulz and Emil Matus and
                 Gerhard P. Fettweis",
  title =        "Accurate Estimation of Service Rates in Interleaved
                 Scratchpad Memory Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "4:1--4:15",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457171",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457171",
  abstract =     "The prototyping of embedded platforms demands rapid
                 exploration of multi-dimensional parameter sets.
                 Especially the design of the memory system is essential
                 to guarantee high utilization while reducing conflicts
                 at the same time. To aid the design process, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hakert:2022:SMR,
  author =       "Christian Hakert and Kuan-Hsun Chen and Horst
                 Schirmeier and Lars Bauer and Paul R. Genssler and
                 Georg von der Br{\"u}ggen and Hussam Amrouch and
                 J{\"o}rg Henkel and Jian-Jia Chen",
  title =        "Software-Managed Read and Write Wear-Leveling for
                 Non-Volatile Main Memory",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "5:1--5:24",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3483839",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3483839",
  abstract =     "In-memory wear-leveling has become an important
                 research field for emerging non-volatile main memories
                 over the past years. Many approaches in the literature
                 perform wear-leveling by making use of special
                 hardware. Since most non-volatile memories only
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Asifuzzaman:2022:PPE,
  author =       "Kazi Asifuzzaman and Rommel S{\'a}nchez Verdejo and
                 Petar Radojkovi{\'c}",
  title =        "Performance and Power Estimation of {STT--MRAM} Main
                 Memory with Reliable System-level Simulation",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "6:1--6:25",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476838",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476838",
  abstract =     "It is questionable whether DRAM will continue to scale
                 and will meet the needs of next-generation systems.
                 Therefore, significant effort is invested in research
                 and development of novel memory technologies. One of
                 the candidates for next-generation memory \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shin:2022:EED,
  author =       "Dongsuk Shin and Hakbeom Jang and Kiseok Oh and Jae W.
                 Lee",
  title =        "An Energy-Efficient {DRAM} Cache Architecture for
                 Mobile Platforms With {PCM}-Based Main Memory",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "7:1--7:22",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451995",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451995",
  abstract =     "A long battery life is a first-class design objective
                 for mobile devices, and main memory accounts for a
                 major portion of total energy consumption. Moreover,
                 the energy consumption from memory is expected to
                 increase further with ever-growing demands for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wen:2022:SHD,
  author =       "Fei Wen and Mian Qin and Paul Gratz and Narasimha
                 Reddy",
  title =        "Software Hint-Driven Data Management for Hybrid Memory
                 in Mobile Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "8:1--8:18",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494536",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494536",
  abstract =     "Hybrid memory systems, comprised of emerging
                 non-volatile memory (NVM) and DRAM, have been proposed
                 to address the growing memory demand of current mobile
                 applications. Recently emerging NVM technologies, such
                 as phase-change memories (PCM), memristor, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zou:2022:DHA,
  author =       "Yu Zou and Amro Awad and Mingjie Lin",
  title =        "{DirectNVM}: Hardware-accelerated {NVMe SSDs} for
                 High-performance Embedded Computing",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "9:1--9:24",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3463911",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3463911",
  abstract =     "With data-intensive artificial intelligence (AI) and
                 machine learning (ML) applications rapidly surging,
                 modern high-performance embedded systems, with
                 heterogeneous computing resources, critically demand
                 low-latency and high-bandwidth data communication.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Missimer:2022:TRT,
  author =       "Katherine Missimer and Manos Athanassoulis and Richard
                 West",
  title =        "{Telomere}: Real-Time {NAND} Flash Storage",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "10:1--10:24",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3479157",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3479157",
  abstract =     "Modern solid-state disks achieve high data transfer
                 rates due to their massive internal parallelism.
                 However, out-of-place updates for flash memory incur
                 garbage collection costs when valid data needs to be
                 copied during space reclamation. The root cause of this
                 extra cost is that solid-state disks are not always
                 able to accurately determine data lifetime and group
                 together data that expires before the space needs to be
                 reclaimed. Real-time systems found in autonomous
                 vehicles, industrial control systems, and assembly-line
                 robots store data from hundreds of sensors and often
                 have predictable data lifetimes. These systems require
                 guaranteed high storage bandwidth for read and write
                 operations by mission-critical real-time tasks. In this
                 article, we depart from the traditional block device
                 interface to guarantee the high throughput needed to
                 process large volumes of data. Using data lifetime
                 information from the application layer, our proposed
                 real-time design, called Telomere, is able to
                 intelligently lay out data in NAND flash memory and
                 eliminate valid page copies during garbage collection.
                 Telomere's real-time admission control is able to
                 guarantee tasks their required read and write
                 operations within their periods. Under randomly
                 generated tasksets containing 500 tasks, Telomere
                 achieves 30\% higher throughput with a 5\% storage cost
                 compared to pre-existing techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zou:2022:APS,
  author =       "Yu Zou and Kazi Abu Zubair and Mazen Alwadi and Rakin
                 Muhammad Shadab and Sanjay Gandham and Amro Awad and
                 Mingjie Lin",
  title =        "{ARES}: Persistently Secure Non-Volatile Memory with
                 Processor-transparent and Hardware-friendly Integrity
                 Verification and Metadata Recovery",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "11:1--11:32",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3492735",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3492735",
  abstract =     "Emerging byte-addressable Non-Volatile Memory (NVM)
                 technology, although promising superior memory density
                 and ultra-low energy consumption, poses unique
                 challenges to achieving persistent data privacy and
                 computing security, both of which are critically
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Eldstaal-Ahrens:2022:CCL,
  author =       "Albin Eldst{\aa}l-Ahrens and Angelos Arelakis and
                 Ioannis Sourdis",
  title =        "{L$^2$C}: Combining Lossy and Lossless Compression on
                 Memory and {I/O}",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "12:1--12:27",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3481641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3481641",
  abstract =     "In this article, we introduce L$^2$C, a hybrid
                 lossy/lossless compression scheme applicable both to
                 the memory subsystem and I/O traffic of a processor
                 chip. L$^2$C employs general-purpose lossless
                 compression and combines it with state-of-the-art lossy
                 compression to achieve compression ratios up to 16:1
                 and to improve the utilization of chip s bandwidth
                 resources. Compressing memory traffic yields lower
                 memory access time, improving system performance, and
                 energy efficiency. Compressing I/O traffic offers
                 several benefits for resource-constrained systems,
                 including more efficient storage and networking. We
                 evaluate L$^2$C as a memory compressor in simulation
                 with a set of approximation-tolerant applications.
                 L$^2$C improves baseline execution time by an average
                 of 50\% and total system energy consumption by 16\%.
                 Compared to the lossy and lossless current
                 state-of-the-art memory compression approaches, L$^2$C
                 improves execution time by 9\% and 26\%, respectively,
                 and reduces system energy costs by 3\% and 5\%,
                 respectively. I/O compression efficacy is evaluated
                 using a set of real-life datasets. L$^2$C achieves
                 compression ratios of up to 10.4:1 for a single dataset
                 and on average about 4:1, while introducing no more
                 than 0.4\% error.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nie:2022:HRA,
  author =       "Lanshun Nie and Chenghao Fan and Shuang Lin and Li
                 Zhang and Yajuan Li and Jing Li",
  title =        "Holistic Resource Allocation Under Federated
                 Scheduling for Parallel Real-time Tasks",
  journal =      j-TECS,
  volume =       "21",
  number =       "1",
  pages =        "13:1--13:29",
  month =        jan,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3489467",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Feb 16 14:00:33 MST 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3489467",
  abstract =     "With the technology trend of hardware and workload
                 consolidation for embedded systems and the rapid
                 development of edge computing, there has been
                 increasing interest in supporting parallel real-time
                 tasks to better utilize the multi-core platforms while
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Minakova:2022:SBR,
  author =       "Svetlana Minakova and Dolly Sapra and Todor Stefanov
                 and Andy D. Pimentel",
  title =        "Scenario Based Run-Time Switching for Adaptive
                 {CNN}-Based Applications at the Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "14:1--14:33",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488718",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488718",
  abstract =     "Convolutional Neural Networks (CNNs) are biologically
                 inspired computational models that are at the heart of
                 many modern computer vision and natural language
                 processing applications. Some of the CNN-based
                 applications are executed on mobile and embedded
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2022:PRA,
  author =       "Xing Chen and Umit Ogras and Chaitali Chakrabarti",
  title =        "Probabilistic Risk-Aware Scheduling with Deadline
                 Constraint for Heterogeneous {SoCs}",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "15:1--15:27",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3489409",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3489409",
  abstract =     "Hardware Trojans can compromise System-on-Chip (SoC)
                 performance. Protection schemes implemented to combat
                 these threats cannot guarantee 100\% detection rate and
                 may also introduce performance overhead. This paper
                 defines the risk of running a job on an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2022:EEA,
  author =       "Jiankuo Dong and Fangyu Zheng and Jingqiang Lin and
                 Zhe Liu and Fu Xiao and Guang Fan",
  title =        "{EC-ECC}: Accelerating Elliptic Curve Cryptography for
                 Edge Computing on Embedded {GPU TX2}",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "16:1--16:25",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3492734",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3492734",
  abstract =     "Driven by artificial intelligence and computer vision
                 industries, Graphics Processing Units (GPUs) are now
                 rapidly achieving extraordinary computing power. In
                 particular, the NVIDIA Tegra K1/X1/X2 embedded GPU
                 platforms, which are also treated as edge \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Biswas:2022:PNC,
  author =       "Arnab Kumar Biswas and Biplab Sikdar",
  title =        "Protecting Network-on-Chip Intellectual Property Using
                 Timing Channel Fingerprinting",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "17:1--17:21",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3495565",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495565",
  abstract =     "The theft of Intellectual property (IP) is a serious
                 security threat for all businesses that are involved in
                 the creation of IP. In this article, we consider such
                 attacks against IP for Network-on-Chip (NoC) that are
                 commonly used as a popular on-chip \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liao:2022:RRS,
  author =       "Jianwei Liao and Jun Li and Mingwang Zhao and Zhibing
                 Sha and Zhigang Cai",
  title =        "Read Refresh Scheduling and Data Reallocation against
                 Read Disturb in {SSDs}",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "18:1--18:27",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3495254",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495254",
  abstract =     "Read disturb is a circuit-level noise in flash-based
                 Solid-State Drives (SSDs), induced by intensive read
                 requests, which may result in unexpected read errors.
                 The approach of read refresh (RR) is commonly adopted
                 to mitigate its negative effects by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hong:2022:EGE,
  author =       "Ziyang Hong and C. Patrick Yue",
  title =        "Efficient-Grad: Efficient Training Deep Convolutional
                 Neural Networks on Edge Devices with Gradient
                 Optimizations",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "19:1--19:24",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3504034",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3504034",
  abstract =     "With the prospering of mobile devices, the distributed
                 learning approach, enabling model training with
                 decentralized data, has attracted great interest from
                 researchers. However, the lack of training capability
                 for edge devices significantly limits the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2022:MSM,
  author =       "Qingling Zhao and Mengfei Qu and Zonghua Gu and Haibo
                 Zeng",
  title =        "Minimizing Stack Memory for Partitioned
                 Mixed-criticality Scheduling on Multiprocessor
                 Platforms",
  journal =      j-TECS,
  volume =       "21",
  number =       "2",
  pages =        "20:1--20:30",
  month =        mar,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3506703",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:59:57 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3506703",
  abstract =     "A Mixed-Criticality System (MCS) features the
                 integration of multiple subsystems that are subject to
                 different levels of safety certification on a shared
                 hardware platform. In cost-sensitive application
                 domains such as automotive E/E systems, it is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2022:ISIb,
  author =       "Yuan-Hao Chang and Jalil Boukhobza and Song Han",
  title =        "Introduction to the Special Issue on Memory and
                 Storage Systems for Embedded and {IoT} Applications:
                 {Part 2}",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "21:1--21:2",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3531707",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3531707",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gupta:2022:SLC,
  author =       "Saransh Gupta and Behnam Khaleghi and Sahand Salamat
                 and Justin Morris and Ranganathan Ramkumar and Jeffrey
                 Yu and Aniket Tiwari and Jaeyoung Kang and Mohsen Imani
                 and Baris Aksanli and Tajana Simuni{\'c} Rosing",
  title =        "Store-n-Learn: Classification and Clustering with
                 Hyperdimensional Computing across Flash Hierarchy",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "22:1--22:25",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3503541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3503541",
  abstract =     "Processing large amounts of data, especially in
                 learning algorithms, poses a challenge for current
                 embedded computing systems. Hyperdimensional (HD)
                 computing (HDC) is a brain-inspired computing paradigm
                 that works with high-dimensional vectors called
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rrushi:2022:PDP,
  author =       "Julian L. Rrushi",
  title =        "Physics-Driven Page Fault Handling for Customized
                 Deception against {CPS} Malware",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "23:1--23:36",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502742",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502742",
  abstract =     "Malware crafted to attack cyber-physical systems such
                 as the electrical power grid have a physics-centric
                 nucleus. Cyber-physical systems malware understand
                 physics and hence use their knowledge to guide how they
                 initiate physical damage on a compromised \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2022:DRR,
  author =       "Wei-Ting Lin and Hsiang-Yun Cheng and Chia-Lin Yang
                 and Meng-Yao Lin and Kai Lien and Han-Wen Hu and
                 Hung-Sheng Chang and Hsiang-Pang Li and Meng-Fan Chang
                 and Yen-Ting Tsou and Chin-Fu Nien",
  title =        "{DL-RSIM}: a Reliability and Deployment Strategy
                 Simulation Framework for {ReRAM}-based {CNN}
                 Accelerators",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "24:1--24:29",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3507639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3507639",
  abstract =     "Memristor-based deep learning accelerators provide a
                 promising solution to improve the energy efficiency of
                 neuromorphic computing systems. However, the electrical
                 properties and crossbar structure of memristors make
                 these accelerators error-prone. In \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wei:2022:SBD,
  author =       "Qian Wei and Bingzhe Li and Wanli Chang and Zhiping
                 Jia and Zhaoyan Shen and Zili Shao",
  title =        "A Survey of Blockchain Data Management Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "25:1--25:28",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502741",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502741",
  abstract =     "Blockchain has been widely deployed in various fields,
                 such as finance, education, and public services.
                 Blockchain has decentralized mechanisms with
                 persistency and auditability and runs as an immutable
                 distributed ledger, where transactions are jointly
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bai:2022:FCW,
  author =       "Zhenyu Bai and Hugues Cass{\'e} and Marianne {De
                 Michiel} and Thomas Carle and Christine Rochange",
  title =        "A Framework for Calculating {WCET} Based on Execution
                 Decision Diagrams",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "26:1--26:26",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3476879",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3476879",
  abstract =     "Due to the dynamic behaviour of acceleration
                 mechanisms such as caches and branch predictors, static
                 Worst-case Execution Time (WCET) analysis methods tend
                 to scale poorly to modern hardware architectures. As a
                 result, a trade-off must be found between \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Song:2022:DDB,
  author =       "Shihao Song and Harry Chong and Adarsha Balaji and
                 Anup Das and James Shackleford and Nagarajan
                 Kandasamy",
  title =        "{DFSynthesizer}: Dataflow-based Synthesis of Spiking
                 Neural Networks to Neuromorphic Hardware",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "27:1--27:35",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3479156",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3479156",
  abstract =     "Spiking Neural Networks (SNNs) are an emerging
                 computation model that uses event-driven activation and
                 bio-inspired learning algorithms. SNN-based machine
                 learning programs are typically executed on tile-based
                 neuromorphic hardware platforms, where each \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xiao:2022:CIA,
  author =       "Jun Xiao and Yixian Shen and Andy D. Pimentel",
  title =        "Cache Interference-aware Task Partitioning for
                 Non-preemptive Real-time Multi-core Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "28:1--28:28",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487581",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487581",
  abstract =     "Shared caches in multi-core processors introduce
                 serious difficulties in providing guarantees on the
                 real-time properties of embedded software due to the
                 interaction and the resulting contention in the shared
                 caches. Prior work has studied the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ullah:2022:ADA,
  author =       "Salim Ullah and Siva Satyendra Sahoo and Nemath Ahmed
                 and Debabrata Chaudhury and Akash Kumar",
  title =        "{AppAxO}: Designing Application-specific Approximate
                 Operators for {FPGA}-based Embedded Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "29:1--29:31",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3513262",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3513262",
  abstract =     "Approximate arithmetic operators, such as adders and
                 multipliers, are increasingly used to satisfy the
                 energy and performance requirements of
                 resource-constrained embedded systems. However, most of
                 the available approximate operators have an
                 application-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2022:HEI,
  author =       "Yi-Syuan Lin and Yu-Pei Liang and Tseng-Yi Chen and
                 Yuan-Hao Chang and Shuo-Han Chen and Hsin-Wen Wei and
                 Wei-Kuan Shih",
  title =        "How to Enable Index Scheme for Reducing the Writing
                 Cost of {DNA} Storage on Insertion and Deletion",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "30:1--30:25",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3516482",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3516482",
  abstract =     "Recently, the requirement of storing digital data has
                 been growing rapidly; however, the conventional storage
                 medium cannot satisfy these huge demands. Fortunately,
                 thanks to biological technology development, storing
                 digital data into deoxyribonucleic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Raj:2022:RMV,
  author =       "Pani Prithvi Raj and Pakala Akhil Reddy and Nitin
                 Chandrachoodan",
  title =        "Reduced Memory {Viterbi} Decoding for
                 Hardware-accelerated Speech Recognition",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "31:1--31:18",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510028",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510028",
  abstract =     "Large Vocabulary Continuous Speech Recognition systems
                 require Viterbi searching through a large state space
                 to find the most probable sequence of phonemes that led
                 to a given sound sample. This needs storing and
                 updating of a large Active State List (ASL). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Desai:2022:CLR,
  author =       "Harsh Desai and Matteo Nardello and Davide Brunelli
                 and Brandon Lucia",
  title =        "{Camaroptera}: a Long-range Image Sensor with Local
                 Inference for Remote Sensing Applications",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "32:1--32:25",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510850",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510850",
  abstract =     "Batteryless image sensors present an opportunity for
                 long-life, long-range sensor deployments that require
                 zero maintenance, and have low cost. Such deployments
                 are critical for enabling remote sensing applications,
                 e.g., instrumenting national highways, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mao:2022:TEA,
  author =       "Jiachen Mao and Qing Yang and Ang Li and Kent W. Nixon
                 and Hai Li and Yiran Chen",
  title =        "Toward Efficient and Adaptive Design of Video
                 Detection System with Deep Neural Networks",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "33:1--33:21",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3484946",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3484946",
  abstract =     "In the past decade, Deep Neural Networks (DNNs), e.g.,
                 Convolutional Neural Networks, achieved human-level
                 performance in vision tasks such as object
                 classification and detection. However, DNNs are known
                 to be computationally expensive and thus hard to be
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2022:SRT,
  author =       "Cong Chen and Zhong Hong and Jian-Min Jiang",
  title =        "Scheduling in Real-Time Mobile Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "3",
  pages =        "34:1--34:36",
  month =        may,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3517747",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Jul 20 06:57:46 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3517747",
  abstract =     "To guarantee the safety and security of a real-time
                 mobile system such as an intelligent transportation
                 system, it is necessary to model and analyze its
                 behaviors prior to actual development. In particular,
                 the mobile objects in such systems must be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Trajkovic:2022:PMA,
  author =       "Jelena Trajkovic and Sara Karimi and Samantha Hangsan
                 and Wenlu Zhang",
  title =        "Prediction Modeling for Application-Specific
                 Communication Architecture Design of Optical {NoC}",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520241",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520241",
  abstract =     "Multi-core systems-on-chip are becoming
                 state-of-the-art. Therefore, there is a need for a fast
                 and energy-efficient interconnect to take full
                 advantage of the computational capabilities.
                 Integration of silicon photonics with a traditional
                 electrical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Krishnan:2022:BCS,
  author =       "Archanaa S. Krishnan and Patrick Schaumont",
  title =        "Benchmarking and Configuring Security Levels in
                 Intermittent Computing",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522748",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522748",
  abstract =     "Intermittent computing derives its name from the
                 intermittent character of the power source used to
                 drive the computing, typically an energy harvester of
                 ambient energy sources. Intermittent computing is
                 characterized by frequent transitions between the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2022:HFY,
  author =       "Shihua Huang and Luc Waeijen and Henk Corporaal",
  title =        "How Flexible is Your Computing System?",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524861",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524861",
  abstract =     "In literature, computer architectures are frequently
                 claimed to be highly flexible, typically implying the
                 existence of trade-offs between flexibility and
                 performance or energy efficiency. Processor
                 flexibility, however, is not very sharply defined, and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Isuwa:2022:QMQ,
  author =       "Samuel Isuwa and Somdip Dey and Andre P. Ortega and
                 Amit Kumar Singh and Bashir M. Al-Hashimi and Geoff V.
                 Merrett",
  title =        "{QUAREM}: Maximising {QoE} Through Adaptive Resource
                 Management in Mobile {MPSoC} Platforms",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3526116",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3526116",
  abstract =     "Heterogeneous multi-processor system-on-chip (MPSoC)
                 smartphones are required to offer increasing
                 performance and user quality-of-experience (QoE),
                 despite comparatively slow advances in battery
                 technology. Approaches to balance instantaneous power
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2022:ARF,
  author =       "Yanfeng Chen and Tianyu Zhang and Fanxin Kong and Lin
                 Zhang and Qingxu Deng",
  title =        "Attack-resilient Fusion of Sensor Data with Uncertain
                 Delays",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532181",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532181",
  abstract =     "Malicious attackers may disrupt the safety of
                 autonomous systems through compromising sensors to feed
                 wrong measurements to the controller. This article
                 proposes attack-resilient sensor fusion that combines
                 local sensor readings and shared sensing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{France-Pillois:2022:SAR,
  author =       "Maxime France-Pillois and Abdoulaye Gamati{\'e} and
                 Gilles Sassatelli",
  title =        "A Segmented Adaptive Router for Near
                 Energy-Proportional Networks-on-Chip",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529106",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529106",
  abstract =     "A Network-on-Chip (NoC) is an essential component of a
                 chip multiprocessor (CMP) which however contributes to
                 a large fraction of system energy. The unpredictability
                 of traffic across a NoC frequently involves an
                 expensive over-sizing of NoC resources \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mishra:2022:SCF,
  author =       "Tanmaya Mishra and Thidapat Chantem and Ryan Gerdes",
  title =        "Survey of Control-flow Integrity Techniques for
                 Real-time Embedded Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3538275",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3538275",
  abstract =     "Computing systems, including real-time embedded
                 systems, are becoming increasingly connected to allow
                 for more advanced and safer operation. Such embedded
                 systems are also often resource-constrained, for
                 example, with lower processing capabilities \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2022:RID,
  author =       "Tse-Yuan Wang and Chun-Feng Wu and Che-Wei Tsao and
                 Yuan-Hao Chang and Tei-Wei Kuo and Xue Liu",
  title =        "Rethinking the Interactivity of {OS} and Device Layers
                 in Memory Management",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530876",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530876",
  abstract =     "In the big data era, a huge number of services has
                 placed a fast-growing demand on the capacity of
                 DRAM-based main memory. However, due to the high
                 hardware cost and serious leakage power/energy
                 consumption, the growth rate of DRAM capacity cannot
                 meet \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ouyang:2022:WWF,
  author =       "Xiangzhen Ouyang and Yian Zhu",
  title =        "\pkg{wfspan}: Wait-free Dynamic Memory Management",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533724",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533724",
  abstract =     "Dynamic memory allocation plays a vital role in modern
                 application programs. Modern lock-free memory
                 allocators based on hardware atomic primitives usually
                 provide good performance. However, threads may starve
                 in these lock-free implementations, leading \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Choi:2022:ECA,
  author =       "Kyubaik Choi and Gerald E. Sobelman",
  title =        "An Efficient {CNN} Accelerator for Low-Cost Edge
                 Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3539224",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3539224",
  abstract =     "Customized hardware based convolutional neural network
                 (CNN or ConvNet) accelerators have attracted
                 significant attention for applications in a low-cost,
                 edge computing system. However, there is a lack of
                 research that seeks to optimize at both the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2022:CBI,
  author =       "Qingling Zhao and Mingqiang Chen and Zonghua Gu and
                 Siyu Luan and Haibo Zeng and Samarjit Chakrabory",
  title =        "{CAN} Bus Intrusion Detection Based on Auxiliary
                 Classifier {GAN} and Out-of-distribution Detection",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3540198",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3540198",
  abstract =     "The Controller Area Network (CAN) is a ubiquitous bus
                 protocol present in the Electrical/Electronic (E/E)
                 systems of almost all vehicles. It is vulnerable to a
                 range of attacks once the attacker gains access to the
                 bus through the vehicle's attack \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Daghero:2022:HAR,
  author =       "Francesco Daghero and Alessio Burrello and Chen Xie
                 and Marco Castellano and Luca Gandolfi and Andrea
                 Calimera and Enrico Macii and Massimo Poncino and
                 Daniele Jahier Pagliari",
  title =        "Human Activity Recognition on Microcontrollers with
                 Quantized and Adaptive Deep Neural Networks",
  journal =      j-TECS,
  volume =       "21",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3542819",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Oct 29 08:11:12 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3542819",
  abstract =     "Human Activity Recognition (HAR) based on inertial
                 data is an increasingly diffused task on embedded
                 devices, from smartphones to ultra low-power sensors.
                 Due to the high computational complexity of deep
                 learning models, most embedded HAR systems are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shafique:2022:ISIa,
  author =       "Muhammad Shafique and Theocharis Theocharides and Hai
                 Li and Chun Jason Xue",
  title =        "Introduction to the Special Issue on Accelerating {AI}
                 on the Edge --- {Part 1}",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "47:1--47:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3558078",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558078",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mendez:2022:EIC,
  author =       "Javier Mendez and Kay Bierzynski and M. P. Cu{\'e}llar
                 and Diego P. Morales",
  title =        "Edge Intelligence: Concepts, Architectures,
                 Applications, and Future Directions",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "48:1--48:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3486674",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3486674",
  abstract =     "The name edge intelligence, also known as Edge AI, is
                 a recent term used in the past few years to refer to
                 the confluence of machine learning, or broadly speaking
                 artificial intelligence, with edge computing. In this
                 article, we revise the concepts \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kang:2022:MLM,
  author =       "Chih-Kai Kang and Hashan Roshantha Mendis and Chun-Han
                 Lin and Ming-Syan Chen and Pi-Cheng Hsiu",
  title =        "More Is Less: Model Augmentation for Intermittent Deep
                 Inference",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "49:1--49:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3506732",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3506732",
  abstract =     "Energy harvesting creates an emerging intermittent
                 computing paradigm but poses new challenges for
                 sophisticated applications such as intermittent deep
                 neural network (DNN) inference. Although model
                 compression has adapted DNNs to resource-constrained
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhu:2022:TUO,
  author =       "Shien Zhu and Luan H. K. Duong and Weichen Liu",
  title =        "{TAB}: Unified and Optimized Ternary, Binary, and
                 Mixed-precision Neural Network Inference on the Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "50:1--50:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3508390",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508390",
  abstract =     "Ternary Neural Networks (TNNs) and mixed-precision
                 Ternary Binary Networks (TBNs) have demonstrated higher
                 accuracy compared to Binary Neural Networks (BNNs)
                 while providing fast, low-power, and memory-efficient
                 inference. Related works have improved the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jeong:2022:TBF,
  author =       "Eunjin Jeong and Jangryul Kim and Soonhoi Ha",
  title =        "{TensorRT}-Based Framework and Optimization
                 Methodology for Deep Learning Inference on {Jetson}
                 Boards",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "51:1--51:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3508391",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508391",
  abstract =     "As deep learning inference applications are increasing
                 in embedded devices, an embedded device tends to equip
                 neural processing units (NPUs) in addition to a
                 multi-core CPU and a GPU. NVIDIA Jetson AGX Xavier is
                 an example. For fast and efficient \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kundu:2022:TAA,
  author =       "Souvik Kundu and Yao Fu and Bill Ye and Peter A.
                 Beerel and Massoud Pedram",
  title =        "Toward Adversary-aware Non-iterative Model Pruning
                 through Dynamic Network Rewiring of {DNNs}",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "52:1--52:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510833",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510833",
  abstract =     "We present a dynamic network rewiring (DNR) method to
                 generate pruned deep neural network (DNN) models that
                 both are robust against adversarially generated images
                 and maintain high accuracy on clean images. In
                 particular, the disclosed DNR training method
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paissan:2022:PSB,
  author =       "Francesco Paissan and Alberto Ancilotto and Elisabetta
                 Farella",
  title =        "{PhiNets}: a Scalable Backbone for Low-power {AI} at
                 the Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "53:1--53:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510832",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510832",
  abstract =     "In the Internet of Things era, where we see many
                 interconnected and heterogeneous mobile and fixed smart
                 devices, distributing the intelligence from the cloud
                 to the edge has become a necessity. Due to limited
                 computational and communication capabilities,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gomez:2022:DDP,
  author =       "Andres Gomez and Andreas Tretter and Pascal Alexander
                 Hager and Praveenth Sanmugarajah and Luca Benini and
                 Lothar Thiele",
  title =        "Dataflow Driven Partitioning of Machine Learning
                 Applications for Optimal Energy Use in Batteryless
                 Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "54:1--54:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520135",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520135",
  abstract =     "Sensing systems powered by energy harvesting have
                 traditionally been designed to tolerate long periods
                 without energy. As the Internet of Things (IoT) evolves
                 toward a more transient and opportunistic execution
                 paradigm, reducing energy storage costs will \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kutukcu:2022:CGA,
  author =       "Basar Kutukcu and Sabur Baidya and Anand Raghunathan
                 and Sujit Dey",
  title =        "Contention Grading and Adaptive Model Selection for
                 Machine Vision in Embedded Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "55:1--55:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520134",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520134",
  abstract =     "Real-time machine vision applications running on
                 resource-constrained embedded systems face challenges
                 for maintaining performance. An especially challenging
                 scenario arises when multiple applications execute at
                 the same time, creating contention for the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jokic:2022:CKE,
  author =       "Petar Jokic and Erfan Azarkhish and Andrea Bonetti and
                 Marc Pons and Stephane Emery and Luca Benini",
  title =        "A Construction Kit for Efficient Low Power Neural
                 Network Accelerator Designs",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "56:1--56:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520127",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520127",
  abstract =     "Implementing embedded neural network processing at the
                 edge requires efficient hardware acceleration that
                 combines high computational throughput with low power
                 consumption. Driven by the rapid evolution of network
                 architectures and their algorithmic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Resch:2022:EER,
  author =       "Salonik Resch and S. Karen Khatamifard and Zamshed I.
                 Chowdhury and Masoud Zabihi and Zhengyang Zhao and
                 Husrev Cilasun and Jian-Ping Wang and Sachin S.
                 Sapatnekar and Ulya R. Karpuzcu",
  title =        "Energy-efficient and Reliable Inference in Nonvolatile
                 Memory under Extreme Operating Conditions",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "57:1--57:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520130",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520130",
  abstract =     "Beyond-edge devices can operate outside the reach of
                 the power grid and without batteries. Such devices can
                 be deployed in large numbers in regions that are
                 difficult to access. Using machine learning, these
                 devices can solve complex problems and relay \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Herzog:2022:RDE,
  author =       "Benedict Herzog and Stefan Reif and Judith Hemp and
                 Timo H{\"o}nig and Wolfgang Schr{\"o}der-Preikschat",
  title =        "Resource-demand Estimation for Edge Tensor Processing
                 Units",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "58:1--58:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520132",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520132",
  abstract =     "Machine learning has shown tremendous success in a
                 large variety of applications. The evolution of
                 machine-learning applications from cloud-based systems
                 to mobile and embedded devices has shifted the focus
                 from only quality-related aspects towards the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hemmat:2022:CCA,
  author =       "Maedeh Hemmat and Joshua {San Miguel} and Azadeh
                 Davoodi",
  title =        "{CAP'NN}: a Class-aware Framework for Personalized
                 Neural Network Inference",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520126",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520126",
  abstract =     "We propose a framework for Class-aware Personalized
                 Neural Network Inference (CAP'NN), which prunes an
                 already-trained neural network model based on the
                 preferences of individual users. Specifically, by
                 adapting to the subset of output classes that each
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Park:2022:QST,
  author =       "Jun-Hyung Park and Kang-Min Kim and Sangkeun Lee",
  title =        "Quantized Sparse Training: a Unified Trainable
                 Framework for Joint Pruning and Quantization in
                 {DNNs}",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524066",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524066",
  abstract =     "Deep neural networks typically have extensive
                 parameters and computational operations. Pruning and
                 quantization techniques have been widely used to reduce
                 the complexity of deep models. Both techniques can be
                 jointly used for realizing significantly \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baharani:2022:ARE,
  author =       "Mohammadreza Baharani and Hamed Tabkhi",
  title =        "{ATCN}: Resource-efficient Processing of Time Series
                 on Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524070",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524070",
  abstract =     "This article presents a scalable deep learning model
                 called Agile Temporal Convolutional Network (ATCN) for
                 highly accurate fast classification and time series
                 prediction in resource-constrained embedded systems.
                 ATCN is a family of compact networks with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Goyal:2022:HFU,
  author =       "Vidushi Goyal and Reetuparna Das and Valeria
                 Bertacco",
  title =        "Hardware-friendly User-specific Machine Learning for
                 Edge Devices",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524125",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524125",
  abstract =     "Machine learning (ML) on resource-constrained edge
                 devices is expensive and often requires offloading
                 computation to the cloud, which may compromise the
                 privacy of user data. In contrast, the type of data
                 processed at edge devices is user-specific and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{George:2022:UPE,
  author =       "Biji George and Om Ji Omer and Ziaul Choudhury and
                 {Anoop V} and Sreenivas Subramoney",
  title =        "A Unified Programmable Edge Matrix Processor for Deep
                 Neural Networks and Matrix Algebra",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524453",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524453",
  abstract =     "Matrix Algebra and Deep Neural Networks represent
                 foundational classes of computational algorithms across
                 multiple emerging applications like Augmented Reality
                 or Virtual Reality, autonomous navigation (cars,
                 drones, robots), data science, and various \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bouzidi:2022:PMC,
  author =       "Halima Bouzidi and Hamza Ouarnoughi and Smail Niar and
                 Abdessamad {Ait El Cadi}",
  title =        "Performance Modeling of Computer Vision-based {CNN} on
                 Edge {GPUs}",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3527169",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3527169",
  abstract =     "Convolutional Neural Networks (CNNs) are currently
                 widely used in various fields, particularly for
                 computer vision applications. Edge platforms have drawn
                 tremendous attention from academia and industry due to
                 their ability to improve execution time and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yuan:2022:MFC,
  author =       "Geng Yuan and Peiyan Dong and Mengshu Sun and Wei Niu
                 and Zhengang Li and Yuxuan Cai and Yanyu Li and Jun Liu
                 and Weiwen Jiang and Xue Lin and Bin Ren and Xulong
                 Tang and Yanzhi Wang",
  title =        "Mobile or {FPGA}? {A} Comprehensive Evaluation on
                 Energy Efficiency and a Unified Optimization
                 Framework",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3528578",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3528578",
  abstract =     "Efficient deployment of Deep Neural Networks (DNNs) on
                 edge devices (i.e., FPGAs and mobile platforms) is very
                 challenging, especially under a recent witness of the
                 increasing DNN model size and complexity. Model
                 compression strategies, including weight \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghasemi:2022:EEE,
  author =       "Mehdi Ghasemi and Daler Rakhmatov and Carole-Jean Wu
                 and Sarma Vrudhula",
  title =        "{EdgeWise}: Energy-efficient {CNN} Computation on Edge
                 Devices under Stochastic Communication Delays",
  journal =      j-TECS,
  volume =       "21",
  number =       "5",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530908",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:21 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530908",
  abstract =     "This article presents a framework to enable the
                 energy-efficient execution of convolutional neural
                 networks (CNNs) on edge devices. The framework consists
                 of a pair of edge devices connected via a wireless
                 network: a performance and energy-constrained
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shafique:2022:ISIb,
  author =       "Muhammad Shafique and Theocharis Theocharides and Hai
                 (Helen) Li and Chun Jason Xue",
  title =        "Introduction to the Special Issue on Accelerating {AI}
                 on the Edge --- {Part 2}",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "67:1--67:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563127",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563127",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2022:ERD,
  author =       "Kuan-Hsun Chen and Chiahui Su and Christian Hakert and
                 Sebastian Buschj{\"a}ger and Chao-Lin Lee and Jenq-Kuen
                 Lee and Katharina Morik and Jian-Jia Chen",
  title =        "Efficient Realization of Decision Trees for Real-Time
                 Inference",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "68:1--68:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3508019",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508019",
  abstract =     "For timing-sensitive edge applications, the demand for
                 efficient lightweight machine learning solutions has
                 increased recently. Tree ensembles are among the
                 state-of-the-art in many machine learning applications.
                 While single decision trees are comparably \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2022:BWH,
  author =       "Hongyi Pan and Diaa Badawi and Ahmet Enis Cetin",
  title =        "Block {Walsh-Hadamard} Transform-based Binary Layers
                 in Deep Neural Networks",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "69:1--69:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510026",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510026",
  abstract =     "Convolution has been the core operation of modern deep
                 neural networks. It is well known that convolutions can
                 be implemented in the Fourier Transform domain. In this
                 article, we propose to use binary block Walsh-Hadamard
                 transform (WHT) instead of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mukherjee:2022:AFD,
  author =       "Arijit Mukherjee and Jayeeta Mondal and Swarnava Dey",
  title =        "Accelerated Fire Detection and Localization at Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "70:1--70:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510027",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510027",
  abstract =     "Fire-related incidents continue to be reported as a
                 leading cause of life and property destruction.
                 Automated fire detection and localization (AFDL)
                 systems have grown in importance with the evolution of
                 applied robotics, especially because use of robots
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Almeida:2022:DDO,
  author =       "Mario Almeida and Stefanos Laskaridis and Stylianos I.
                 Venieris and Ilias Leontiadis and Nicholas D. Lane",
  title =        "{DynO}: Dynamic Onloading of Deep Neural Networks from
                 Cloud to Device",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "71:1--71:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3510831",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3510831",
  abstract =     "Recently, there has been an explosive growth of mobile
                 and embedded applications using convolutional neural
                 networks (CNNs). To alleviate their excessive
                 computational demands, developers have traditionally
                 resorted to cloud offloading, inducing high \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ganesan:2022:DST,
  author =       "Vinod Ganesan and Pratyush Kumar",
  title =        "Design and Scaffolded Training of an Efficient {DNN}
                 Operator for Computer Vision on the Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "72:1--72:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3511212",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511212",
  abstract =     "Massively parallel systolic arrays and
                 resource-efficient depthwise separable convolutions are
                 two promising hardware and software techniques to
                 accelerate DNN inference on the edge. Interestingly,
                 their combination is inefficient: Computational
                 patterns \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shahhosseini:2022:OLO,
  author =       "Sina Shahhosseini and Dongjoo Seo and Anil Kanduri and
                 Tianyi Hu and Sung-Soo Lim and Bryan Donyanavard and
                 Amir M. Rahmani and Nikil Dutt",
  title =        "Online Learning for Orchestration of Inference in
                 Multi-user End-edge-cloud Networks",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "73:1--73:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520129",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520129",
  abstract =     "Deep-learning-based intelligent services have become
                 prevalent in cyber-physical applications, including
                 smart cities and health-care. Deploying
                 deep-learning-based intelligence near the end-user
                 enhances privacy protection, responsiveness, and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsouvalas:2022:FST,
  author =       "Vasileios Tsouvalas and Aaqib Saeed and Tanir
                 Ozcelebi",
  title =        "Federated Self-training for Semi-supervised Audio
                 Recognition",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "74:1--74:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520128",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520128",
  abstract =     "Federated Learning is a distributed machine learning
                 paradigm dealing with decentralized and personal
                 datasets. Since data reside on devices such as
                 smartphones and virtual assistants, labeling is
                 entrusted to the clients or labels are extracted in an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lemaire:2022:SAH,
  author =       "Edgar Lemaire and Beno{\^\i}t Miramond and
                 S{\'e}bastien Bilavarn and Hadi Saoud and Nassim
                 Abderrahmane",
  title =        "Synaptic Activity and Hardware Footprint of Spiking
                 Neural Networks in Digital Neuromorphic Systems",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "75:1--75:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520133",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520133",
  abstract =     "Spiking neural networks are expected to bring high
                 resources, power, and energy efficiency to machine
                 learning hardware implementations. In this regard, they
                 could facilitate the integration of Artificial
                 Intelligence in highly constrained embedded \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2022:DDC,
  author =       "Yi Yang and Murugan Sankaradas and Srimat Chakradhar",
  title =        "{DyCo}: Dynamic, Contextualized {AI} Models",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "76:1--76:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520131",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520131",
  abstract =     "Devices with limited computing resources use smaller
                 AI models to achieve low-latency inferencing. However,
                 model accuracy is typically much lower than the
                 accuracy of a bigger model that is trained and deployed
                 in places where the computing resources are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Song:2022:DTC,
  author =       "Shihao Song and Adarsha Balaji and Anup Das and
                 Nagarajan Kandasamy",
  title =        "Design-Technology Co-Optimization for {NVM-Based}
                 Neuromorphic Processing Elements",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "77:1--77:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524068",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524068",
  abstract =     "An emerging use case of machine learning (ML) is to
                 train a model on a high-performance system and deploy
                 the trained model on energy-constrained embedded
                 systems. Neuromorphic hardware platforms, which operate
                 on principles of the biological brain, can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Morris:2022:HUH,
  author =       "Justin Morris and Kazim Ergun and Behnam Khaleghi and
                 Mohen Imani and Baris Aksanli and Tajana Simunic",
  title =        "{HyDREA}: Utilizing Hyperdimensional Computing for a
                 More Robust and Efficient Machine Learning System",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "78:1--78:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524067",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524067",
  abstract =     "Today's systems rely on sending all the data to the
                 cloud and then using complex algorithms, such as Deep
                 Neural Networks, which require billions of parameters
                 and many hours to train a model. In contrast, the human
                 brain can do much of this learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khan:2022:BIC,
  author =       "Asif Ali Khan and S{\'e}bastien Ollivier and Stephen
                 Longofono and Gerald Hempel and Jeronimo Castrillon and
                 Alex K. Jones",
  title =        "Brain-inspired Cognition in Next-generation Racetrack
                 Memories",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "79:1--79:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524071",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524071",
  abstract =     "Hyperdimensional computing (HDC) is an emerging
                 computational framework inspired by the brain that
                 operates on vectors with thousands of dimensions to
                 emulate cognition. Unlike conventional computational
                 frameworks that operate on numbers, HDC, like the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alam:2022:WCD,
  author =       "Syed Asad Alam and Andrew Anderson and Barbara
                 Barabasz and David Gregg",
  title =        "{Winograd} Convolution for Deep Neural Networks:
                 Efficient Point Selection",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "80:1--80:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3524069",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3524069",
  abstract =     "Convolutional neural networks (CNNs) have dramatically
                 improved the accuracy of image, video, and audio
                 processing for tasks such as object recognition, image
                 segmentation, and interactive speech systems. CNNs
                 require large amounts of computing resources \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hassantabar:2022:MMH,
  author =       "Shayan Hassantabar and Joe Zhang and Hongxu Yin and
                 Niraj K. Jha",
  title =        "{MHDeep}: Mental Health Disorder Detection System
                 Based on Wearable Sensors and Artificial Neural
                 Networks",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "81:1--81:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3527170",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3527170",
  abstract =     "Mental health problems impact the quality of life of
                 millions of people around the world. However, diagnosis
                 of mental health disorders is a challenging problem
                 that often relies on self-reporting by patients about
                 their behavioral patterns and social \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{HeydariGorji:2022:LCS,
  author =       "Ali HeydariGorji and Siavash Rezaei and Mahdi
                 Torabzadehkashi and Hossein Bobarshad and Vladimir
                 Alves and Pai H. Chou",
  title =        "Leveraging Computational Storage for Power-Efficient
                 Distributed Data Analytics",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "82:1--82:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3528577",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3528577",
  abstract =     "This article presents a family of computational
                 storage drives (CSDs) and demonstrates their
                 performance and power improvements due to in-storage
                 processing (ISP) when running big data analytics
                 applications. CSDs are an emerging class of solid state
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2022:FDN,
  author =       "Shuwei Li and Changhai Man and Ao Shen and Ziyi Guan
                 and Wei Mao and Shaobo Luo and Rumin Zhang and Hao Yu",
  title =        "A Fall Detection Network by {$2$D\slash} {$3$D}
                 Spatio-temporal Joint Models with Tensor Compression on
                 Edge",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "83:1--83:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3531004",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3531004",
  abstract =     "Falling is ranked highly among the threats in elderly
                 healthcare, which promotes the development of automatic
                 fall detection systems with extensive concern. With the
                 fast development of the Internet of Things (IoT) and
                 Artificial Intelligence (AI), camera \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Soliman:2022:FFF,
  author =       "Taha Soliman and Nellie Laleni and Tobias Kirchner and
                 Franz M{\"u}ller and Ashish Shrivastava and Thomas
                 K{\"a}mpfe and Andre Guntoro and Norbert Wehn",
  title =        "{FELIX}: a Ferroelectric {FET} Based Low Power
                 Mixed-Signal In-Memory Architecture for {DNN}
                 Acceleration",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "84:1--84:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529760",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529760",
  abstract =     "Today, a large number of applications depend on deep
                 neural networks (DNN) to process data and perform
                 complicated tasks at restricted power and latency
                 specifications. Therefore, processing-in-memory (PIM)
                 platforms are actively explored as a promising
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leite:2022:REC,
  author =       "Clayton Frederick Souza Leite and Yu Xiao",
  title =        "Resource-Efficient Continual Learning for Sensor-Based
                 Human Activity Recognition",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "85:1--85:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530910",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530910",
  abstract =     "Recent advances in deep learning have granted
                 unrivaled performance to sensor-based human activity
                 recognition (HAR). However, in a real-world scenario,
                 the HAR solution is subject to diverse changes over
                 time such as the need to learn new activity \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pal:2022:OEI,
  author =       "Subhankar Pal and Swagath Venkataramani and Viji
                 Srinivasan and Kailash Gopalakrishnan",
  title =        "{OnSRAM}: Efficient Inter-Node On-Chip Scratchpad
                 Management in Deep Learning Accelerators",
  journal =      j-TECS,
  volume =       "21",
  number =       "6",
  pages =        "86:1--86:??",
  month =        nov,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530909",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:23 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530909",
  abstract =     "Hardware acceleration of Artificial Intelligence (AI)
                 workloads has gained widespread popularity with its
                 potential to deliver unprecedented performance and
                 efficiency. An important challenge remains in how AI
                 accelerators are programmed to sustain high \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cai:2023:OOF,
  author =       "Xuyi Cai and Ying Wang and Lei Zhang",
  title =        "{Optimus}: an Operator Fusion Framework for Deep
                 Neural Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3520142",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3520142",
  abstract =     "The reduction of neural parameters and operations for
                 the applications on embedded and IoT platforms in
                 current deep neural network (DNN) architectures has
                 received increasing attention. Relatively, the
                 intermediate feature maps of such lightweight neural
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Oh:2023:MFL,
  author =       "Deok-Jae Oh and Yaebin Moon and Do Kyu Ham and Tae Jun
                 Ham and Yongjun Park and Jae W. Lee and Jung Ho Ahn and
                 Eojin Lee",
  title =        "{MaPHeA}: a Framework for Lightweight Memory
                 Hierarchy-aware Profile-guided Heap Allocation",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3527853",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3527853",
  abstract =     "Hardware performance monitoring units (PMUs) are a
                 standard feature in modern microprocessors, providing a
                 rich set of microarchitectural event samplers.
                 Recently, numerous profile-guided optimization (PGO)
                 frameworks have exploited them to feature much
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Monniaux:2023:FVL,
  author =       "David Monniaux and Cyril Six",
  title =        "Formally Verified Loop-Invariant Code Motion and
                 Assorted Optimizations",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529507",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529507",
  abstract =     "We present an approach for implementing a formally
                 certified loop-invariant code motion optimization by
                 composing an unrolling pass and a formally certified
                 yet efficient global subexpression elimination. This
                 approach is lightweight: each pass comes with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wen:2023:WCP,
  author =       "Elliott Wen and Gerald Weber and Suranga Nanayakkara",
  title =        "{WasmAndroid}: a Cross-Platform Runtime for Native
                 Programming Languages on {Android}",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530286",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530286",
  abstract =     "Open source hardware such as RISC-V has been gaining
                 substantial momentum. Recently, they have begun to
                 embrace Google's Android operating system to leverage
                 its software ecosystem. Despite the encouraging
                 progress, a challenging issue arises: a majority
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2023:FNN,
  author =       "Weiwei Chen and Ying Wang and Ying Xu and Chengsi Gao
                 and Cheng Liu and Lei Zhang",
  title =        "A Framework for Neural Network Architecture and
                 Compile Co-optimization",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533251",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533251",
  abstract =     "The efficiency of deep neural network (DNN) solutions
                 on real hardware devices are mainly decided by the DNN
                 architecture and the compiler-level scheduling strategy
                 on the hardware. When we try to fully exploit the
                 underlying hardware and obtain the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Young:2023:CAD,
  author =       "May Young and Alan J. Hu and Guy G. F. Lemieux",
  title =        "Cache Abstraction for Data Race Detection in
                 Heterogeneous Systems with Non-coherent Accelerators",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3535457",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3535457",
  abstract =     "Embedded systems are becoming increasingly complex and
                 heterogeneous, featuring multiple processor cores
                 (which might themselves be heterogeneous) as well as
                 specialized hardware accelerators, all accessing shared
                 memory. Many accelerators are non-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brilli:2023:ECM,
  author =       "Gianluca Brilli and Roberto Cavicchioli and Marco
                 Solieri and Paolo Valente and Andrea Marongiu",
  title =        "Evaluating Controlled Memory Request Injection for
                 Efficient Bandwidth Utilization and Predictable
                 Execution in Heterogeneous {SoCs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3548773",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3548773",
  abstract =     "High-performance embedded platforms are increasingly
                 adopting heterogeneous systems-on-chip (HeSoC) that
                 couple multi-core CPUs with accelerators such as GPU,
                 FPGA, or AI engines. Adopting HeSoCs in the context of
                 real-time workloads is not immediately \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Das:2023:EFS,
  author =       "Satyajit Das and Kevin Martin and Thomas Peyret and
                 Philippe Coussy",
  title =        "An Efficient and Flexible Stochastic {CGRA} Mapping
                 Approach",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3550071",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3550071",
  abstract =     "Coarse-Grained Reconfigurable Array (CGRA)
                 architectures are promising high-performance and
                 power-efficient platforms. However, mapping
                 applications efficiently on CGRA is a challenging task.
                 This is known to be an NP complete problem. Hence,
                 finding good \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Saberi:2023:POT,
  author =       "Iman Saberi and Fathiyeh Faghih and Farzad Sobhi
                 Bavil",
  title =        "A Passive Online Technique for Learning Hybrid
                 Automata from {Input\slash} Output Traces",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3556543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3556543",
  abstract =     "Specification synthesis is the process of deriving a
                 model from the input-output traces of a system. It is
                 used extensively in test design, reverse engineering,
                 and system identification. One type of the resulting
                 artifact of this process for cyber-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cleaveland:2023:FVN,
  author =       "Rachel Cleaveland and Stefan Mitsch and Andr{\'e}
                 Platzer",
  title =        "Formally Verified Next-generation Airborne Collision
                 Avoidance Games in {ACAS X}",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544970",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544970",
  abstract =     "The design of aircraft collision avoidance algorithms
                 is a subtle but important challenge that merits the
                 need for provable safety guarantees. Obtaining such
                 guarantees is nontrivial given the unpredictability of
                 the interplay of the intruder aircraft \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mondal:2023:HTD,
  author =       "Anindan Mondal and Shubrojyoti Karmakar and Mahabub
                 Hasan Mahalat and Suchismita Roy and Bibhash Sen and
                 Anupam Chattopadhyay",
  title =        "Hardware {Trojan} Detection using Transition
                 Probability with Minimal Test Vectors",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3545000",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3545000",
  abstract =     "Hardware Trojans (HTs) are malicious manipulations of
                 the standard functionality of an integrated circuit
                 (IC). Sophisticated defense against HT attacks has
                 become the utmost current research endeavor. In
                 particular, the HTs whose operations depend on the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fradet:2023:RRD,
  author =       "Pascal Fradet and Alain Girault and Ruby Krishnaswamy
                 and Xavier Nicollin and Arash Shafiei",
  title =        "{RDF}: a Reconfigurable Dataflow Model of
                 Computation",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544972",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544972",
  abstract =     "Dataflow Models of Computation (MoCs) are widely used
                 in embedded systems, including multimedia processing,
                 digital signal processing, telecommunications, and
                 automatic control. In a dataflow MoC, an application is
                 specified as a graph of actors connected \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rodionova:2023:TRT,
  author =       "Alena Rodionova and Lars Lindemann and Manfred Morari
                 and George Pappas",
  title =        "Temporal Robustness of Temporal Logic Specifications:
                 Analysis and Control Design",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3550072",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3550072",
  abstract =     "We study the temporal robustness of temporal logic
                 specifications and show how to design temporally robust
                 control laws for time-critical control systems. This
                 topic is of particular interest in connected systems
                 and interleaving processes such as multi-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xu:2023:LVL,
  author =       "Zirui Xu and Fuxun Yu and Chenchen Liu and Xiang
                 Chen",
  title =        "{LanCeX}: a Versatile and Lightweight Defense Method
                 against Condensed Adversarial Attacks in Image and
                 Audio Recognition",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3555375",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555375",
  abstract =     "Convolutional Neural Networks (CNNs) are widely
                 deployed in various embedded recognition applications.
                 However, they demonstrate a considerable vulnerability
                 to adversarial attacks, which leverage the
                 well-designed perturbations to mislead the recognition
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huang:2023:DEL,
  author =       "Wenbo Huang and Lei Zhang and Shuoyuan Wang and Hao Wu
                 and Aiguo Song",
  title =        "Deep Ensemble Learning for Human Activity Recognition
                 Using Wearable Sensors via Filter Activation",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3551486",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3551486",
  abstract =     "During the past decade, human activity recognition (
                 HAR ) using wearable sensors has become a new research
                 hot spot due to its extensive use in various
                 application domains such as healthcare, fitness, smart
                 homes, and eldercare. Deep neural networks, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hessien:2023:PPS,
  author =       "Salah Hessien and Mohamed Hassan",
  title =        "{PISCOT}: a Pipelined Split-Transaction
                 {COTS-Coherent} Bus for Multi-Core Real-Time Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3556975",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3556975",
  abstract =     "Tasks in modern embedded systems such as automotive
                 and avionics communicate among each other using shared
                 data towards achieving the desired functionality of the
                 whole system. In commodity platforms, cores communicate
                 data through the shared memory \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yeh:2023:WRR,
  author =       "Po-Chen Yeh and Chin-Hsien Wu and Yung-Hsiang Lin and
                 Ming-Yan Wu",
  title =        "A Write-Related and Read-Related {DRAM} Allocation
                 Strategy Inside Solid-State Drives {(SSDs)}",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561301",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561301",
  abstract =     "Although NAND flash memory has the advantages of small
                 size, low-power consumption, shock resistance, and fast
                 access speed, NAND flash memory still faces the
                 problems of ``out-of-place updates,'' ``garbage
                 collection,'' and ``unbalanced execution time'' due to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ali:2023:ESE,
  author =       "Ali J. {Ben Ali} and Marziye Kouroshli and Sofiya
                 Semenova and Zakieh Sadat Hashemifar and Steven Y. Ko
                 and Karthik Dantu",
  title =        "{Edge-SLAM}: Edge-Assisted Visual Simultaneous
                 Localization and Mapping",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561972",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561972",
  abstract =     "Localization in urban environments is becoming
                 increasingly important and used in tools such as ARCore
                 [ 18 ], ARKit [ 34 ] and others. One popular mechanism
                 to achieve accurate indoor localization and a map of
                 the space is using Visual Simultaneous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Koh:2023:PST,
  author =       "Jaime Koh and Bruno Bodin",
  title =        "{$K$}-Periodic Scheduling for Throughput-Buffering
                 Trade-Off Exploration of {CSDF}",
  journal =      j-TECS,
  volume =       "22",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3559760",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3559760",
  abstract =     "The design of time-critical embedded systems often
                 requires static models of computation such as
                 cyclo-static dataflow. These models enable performance
                 guarantees, execution correctness, and optimized memory
                 usage. Nonetheless, determining optimal buffer
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ernst:2023:ACN,
  author =       "Rolf Ernst and Dominik St{\"o}hrmann and Alex Bendrick
                 and Adam Kostrzewa",
  title =        "Application-centric Network Management --- Addressing
                 Safety and Real-time in {V2X} Applications",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3528411",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3528411",
  abstract =     "The current roadmaps and surveys for future wireless
                 networking typically focus on communication and
                 networking technologies and use representative
                 applications to derive future network requirements.
                 Such a benchmarking approach, however, does not cover
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pujol:2023:VEC,
  author =       "Roger Pujol and Josep Jorba and Hamid Tabani and
                 Leonidas Kosmidis and Enrico Mezzetti and Jaume Abella
                 and Francisco Cazorla",
  title =        "Vector Extensions in {COTS} Processors to Increase
                 Guaranteed Performance in Real-Time Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561054",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561054",
  abstract =     "The need for increased application performance in
                 high-integrity systems such as those in avionics is on
                 the rise as software continues to implement more
                 complex functionalities. The prevalent computing
                 solution for future high-integrity embedded products
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sarwar:2023:CPE,
  author =       "Mir Sarwar and Rajarshi Ray and Ansuman Banerjee",
  title =        "A Contrastive Plan Explanation Framework for Hybrid
                 System Models",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561532",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561532",
  abstract =     "In artificial intelligence planning, having an
                 explanation of a plan given by a planner is often
                 desirable. The ability to explain various aspects of a
                 synthesized plan to an end user not only brings in
                 trust on the planner but also reveals insights of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Erata:2023:EEA,
  author =       "Ferhat Erata and Eren Yildiz and Arda Goknil and Kasim
                 Sinan Yildirim and Jakub Szefer and Ruzica Piskac and
                 Gokcin Sezgin",
  title =        "{ETAP}: Energy-aware Timing Analysis of Intermittent
                 Programs",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563216",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563216",
  abstract =     "Energy harvesting battery-free embedded devices rely
                 only on ambient energy harvesting that enables
                 stand-alone and sustainable IoT applications. These
                 devices execute programs when the harvested ambient
                 energy in their energy reservoir is sufficient to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gkeka:2023:RSC,
  author =       "Maria Rafaela Gkeka and Alexandros Patras and Nikolaos
                 Tavoularis and Stylianos Piperakis and Emmanouil
                 Hourdakis and Panos Trahanias and Christos D.
                 Antonopoulos and Spyros Lalis and Nikolaos Bellas",
  title =        "Reconfigurable System-on-Chip Architectures for Robust
                 Visual {SLAM} on Humanoid Robots",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3570210",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570210",
  abstract =     "Visual Simultaneous Localization and Mapping (vSLAM)
                 is the method of employing an optical sensor to map the
                 robot's observable surroundings while also identifying
                 the robot's pose in relation to that map. The accuracy
                 and speed of vSLAM calculations can \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2023:HPI,
  author =       "Xinyi Hu and Debiao He and Min Luo and Cong Peng and
                 Qi Feng and Xinyi Huang",
  title =        "High-Performance Implementation of the Identity-Based
                 Signature Scheme in {IEEE P1363} on {GPU}",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564784",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564784",
  abstract =     "Identity-based cryptography is proposed to solve the
                 complicated certificate management of traditional
                 public-key cryptography. The pairing computation and
                 high-level tower extension field arithmetic turn out to
                 be the performance bottleneck of pairing-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kundu:2023:SRB,
  author =       "Atanu Kundu and Sarthak Das and Rajarshi Ray",
  title =        "{SAT-Reach}: a Bounded Model Checker for Affine Hybrid
                 Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3567425",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567425",
  abstract =     "Bounded model checking (BMC) is well-known to be
                 undecidable even for simple hybrid systems. Existing
                 work targeted for a wide class of non-linear hybrid
                 systems reduces the BMC problem to the satisfiability
                 problem of an satisfiability modulo theory \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ogras:2023:ISI,
  author =       "Umit Y. Ogras and Radu Marculescu and Trevor N. Mudge
                 and Michael Kishinevsky",
  title =        "Introduction to the Special Issue on Domain-Specific
                 System-on-Chip Architectures and Run-Time Management
                 Techniques",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3567834",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567834",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Krishnakumar:2023:DSA,
  author =       "Anish Krishnakumar and Umit Ogras and Radu Marculescu
                 and Mike Kishinevsky and Trevor Mudge",
  title =        "Domain-Specific Architectures: Research Problems and
                 Promising Approaches",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563946",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563946",
  abstract =     "Process technology-driven performance and energy
                 efficiency improvements have slowed down as we approach
                 physical design limits. General-purpose manycore
                 architectures attempt to circumvent this challenge, but
                 they have a significant performance and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2023:EDS,
  author =       "Yueting Li and Wang Kang and Kunyu Zhou and Keni Qiu
                 and Weisheng Zhao",
  title =        "Experimental Demonstration of {STT-MRAM}-based
                 Nonvolatile Instantly On\slash Off System for {IoT}
                 Applications: Case Studies",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3546193",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3546193",
  abstract =     "Energy consumption has been a big challenge for
                 electronic devices, particularly for battery-powered
                 Internet of Things (IoT) equipment. To address such a
                 challenge, on the one hand, low-power electronic design
                 methodologies and novel power management \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Aminabadi:2023:SAE,
  author =       "Reza Yazdani Aminabadi and Olatunji Ruwase and Minjia
                 Zhang and Yuxiong He and Jose-Maria Arnau and Antonio
                 Gonazalez",
  title =        "{SHARP}: an Adaptable, Energy-Efficient Accelerator
                 for Recurrent Neural Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3552513",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3552513",
  abstract =     "The effectiveness of Recurrent Neural Networks (RNNs)
                 for tasks such as Automatic Speech Recognition has
                 fostered interest in RNN inference acceleration. Due to
                 the recurrent nature and data dependencies of RNN
                 computations, prior work has designed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Boroujerdian:2023:FES,
  author =       "Behzad Boroujerdian and Ying Jing and Devashree
                 Tripathy and Amit Kumar and Lavanya Subramanian and
                 Luke Yen and Vincent Lee and Vivek Venkatesan and Amit
                 Jindal and Robert Shearer and Vijay Janapa Reddi",
  title =        "{FARSI}: an Early-stage Design Space Exploration
                 Framework to Tame the Domain-specific System-on-chip
                 Complexity",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544016",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544016",
  abstract =     "Domain-specific SoCs (DSSoCs) are an attractive
                 solution for domains with extremely stringent power,
                 performance, and area constraints. However, DSSoCs
                 suffer from two fundamental complexities. On the one
                 hand, their many specialized hardware blocks \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brumar:2023:EDA,
  author =       "Iulian Brumar and Georgios Zacharopoulos and Yuan Yao
                 and Saketh Rama and David Brooks and Gu-Yeon Wei",
  title =        "Early {DSE} and Automatic Generation of Coarse-grained
                 Merged Accelerators",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3546070",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3546070",
  abstract =     "Post-Moore's law area-constrained systems rely on
                 accelerators to deliver performance enhancements.
                 Coarse-grained accelerators can offer substantial
                 domain acceleration, but manual, ad hoc identification
                 of code to accelerate is prohibitively expensive.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Inci:2023:QFQ,
  author =       "Ahmet Inci and Siri Virupaksha and Aman Jain and
                 Ting-Wu Chin and Venkata Thallam and Ruizhou Ding and
                 Diana Marculescu",
  title =        "{QUIDAM}: a Framework for Quantization-aware {DNN}
                 Accelerator and Model Co-Exploration",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3555807",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555807",
  abstract =     "As the machine learning and systems communities strive
                 to achieve higher energy efficiency through custom deep
                 neural network (DNN) accelerators, varied precision or
                 quantization levels, and model compression techniques,
                 there is a need for design space \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahangari:2023:HBH,
  author =       "Hamzeh Ahangari and Muhammet Mustafa {\"O}zdal and
                 {\"O}zcan {\"O}zt{\"u}rk",
  title =        "{HLS}-based High-throughput and Work-efficient
                 Synthesizable Graph Processing Template Pipeline",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529256",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529256",
  abstract =     "Hardware systems composed of diverse execution
                 resources are being deployed to cope with the
                 complexity and performance requirements of Artificial
                 Intelligence (AI) and Machine Learning (ML)
                 applications. With the emergence of new hardware
                 platforms, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Koul:2023:AAA,
  author =       "Kalhan Koul and Jackson Melchert and Kavya Sreedhar
                 and Leonard Truong and Gedeon Nyengele and Keyi Zhang
                 and Qiaoyi Liu and Jeff Setter and Po-Han Chen and
                 Yuchen Mei and Maxwell Strange and Ross Daly and Caleb
                 Donovick and Alex Carsello and Taeyoung Kong and
                 Kathleen Feng and Dillon Huff and Ankita Nayak and
                 Rajsekhar Setaluri and James Thomas and Nikhil
                 Bhagdikar and David Durst and Zachary Myers and Nestan
                 Tsiskaridze and Stephen Richardson and Rick Bahr and
                 Kayvon Fatahalian and Pat Hanrahan and Clark Barrett
                 and Mark Horowitz and Christopher Torng and Fredrik
                 Kjolstad and Priyanka Raina",
  title =        "{AHA}: an Agile Approach to the Design of
                 Coarse-Grained Reconfigurable Accelerators and
                 Compilers",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3534933",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3534933",
  abstract =     "With the slowing of Moore's law, computer architects
                 have turned to domain-specific hardware specialization
                 to continue improving the performance and efficiency of
                 computing systems. However, specialization typically
                 entails significant modifications to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mack:2023:CCI,
  author =       "Joshua Mack and Sahil Hassan and Nirmal Kumbhare and
                 Miguel Castro Gonzalez and Ali Akoglu",
  title =        "{CEDR}: a Compiler-integrated, Extensible {DSSoC}
                 Runtime",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529257",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529257",
  abstract =     "In this work, we present a Compiler-integrated,
                 Extensible Domain Specific System on Chip Runtime
                 (CEDR) ecosystem to facilitate research toward
                 addressing the challenges of architecture, system
                 software, and application development with distinct
                 plug-and-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2023:ARL,
  author =       "Huili Chen and Xinqiao Zhang and Ke Huang and Farinaz
                 Koushanfar",
  title =        "{AdaTest}: Reinforcement Learning and Adaptive
                 Sampling for On-chip Hardware {Trojan} Detection",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544015",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544015",
  abstract =     "This paper proposes AdaTest, a novel adaptive test
                 pattern generation framework for efficient and reliable
                 Hardware Trojan (HT) detection. HT is a backdoor attack
                 that tampers with the design of victim integrated
                 circuits (ICs). AdaTest improves the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Anderson:2023:VPM,
  author =       "Jeff Anderson and Engin Kayraklioglu and Hamid Reza
                 Imani and Chen Shen and Mario Miscuglio and Volker J.
                 Sorger and Tarek El-Ghazawi",
  title =        "Virtualizing a Post-{Moore}'s Law Analog Mesh
                 Processor: The Case of a Photonic {PDE} Accelerator",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544971",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544971",
  abstract =     "Innovative processor architectures aim to play a
                 critical role in future sustainment of performance
                 improvements under severe limitations imposed by the
                 end of Moore's Law. The Reconfigurable Optical Computer
                 (ROC) is one such innovative, Post-Moore's Law
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{S:2023:PQA,
  author =       "Aswathy N. S. and Arnab Sarkar and Hemangee Kapoor",
  title =        "A Predictable {QoS}-aware Memory Request Scheduler for
                 Soft Real-time Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "2",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561052",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 11 08:39:26 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561052",
  abstract =     "A memory controller manages the flow of data to and
                 from attached memory devices. The order in which a set
                 of contending memory requests from different tasks are
                 serviced significantly influences the rate of progress
                 and completion times of these tasks. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sifakis:2023:TAS,
  author =       "Joseph Sifakis and David Harel",
  title =        "Trustworthy Autonomous System Development",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "40:1--40:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3545178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3545178",
  abstract =     "Autonomous systems emerge from the need to
                 progressively replace human operators by autonomous
                 agents in a wide variety of application areas. We offer
                 an analysis of the state of the art in developing
                 autonomous systems, focusing on design and validation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shrivastava:2023:ATS,
  author =       "Aviral Shrivastava and Jian-Jia Chen and Akash Kumar
                 and Anup Das",
  title =        "{ACM TECS} Special Issue on Embedded System Security
                 Tutorials",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "41:1--41:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3594872",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594872",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2023:TTR,
  author =       "Huili Chen and Farinaz Koushanfar",
  title =        "Tutorial: Toward Robust Deep Learning against
                 Poisoning Attacks",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "42:1--42:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3574159",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3574159",
  abstract =     "Deep Learning (DL) has been increasingly deployed in
                 various real-world applications due to its
                 unprecedented performance and automated capability of
                 learning hidden representations. While DL can achieve
                 high task performance, the training process of a DL
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Feldtkeller:2023:COS,
  author =       "Jakob Feldtkeller and Pascal Sasdrich and Tim
                 G{\"u}neysu",
  title =        "Challenges and Opportunities of Security-Aware {EDA}",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "43:1--43:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3576199",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3576199",
  abstract =     "The foundation of every digital system is based on
                 hardware in which security, as a core service of many
                 applications, should be deeply embedded. Unfortunately,
                 the knowledge of system security and efficient hardware
                 design is spread over different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Rautakoura:2023:DSH,
  author =       "Antti Rautakoura and Timo H{\"a}m{\"a}l{\"a}inen",
  title =        "Does {SoC} Hardware Development Become Agile by Saying
                 So: a Literature Review and Mapping Study",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "44:1--44:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578554",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578554",
  abstract =     "The success of agile development methods in software
                 development has raised interest in System-on-Chip (SoC)
                 design, which involves high architectural and
                 development process complexity under time and project
                 management pressure. This article discovers \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pearce:2023:HLA,
  author =       "Hammond Pearce and Ramesh Karri and Benjamin Tan",
  title =        "High-Level Approaches to Hardware Security: a
                 Tutorial",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "45:1--45:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3577200",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577200",
  abstract =     "Designers use third-party intellectual property (IP)
                 cores and outsource various steps in the integrated
                 circuit (IC) design and manufacturing flow. As a
                 result, security vulnerabilities have been rising. This
                 is forcing IC designers and end users to re-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gubbi:2023:HTD,
  author =       "Kevin Immanuel Gubbi and Banafsheh Saber Latibari and
                 Anirudh Srikanth and Tyler Sheaves and Sayed Arash
                 Beheshti-Shirazi and Sai Manoj PD and Satareh Rafatirad
                 and Avesta Sasan and Houman Homayoun and Soheil
                 Salehi",
  title =        "Hardware {Trojan} Detection Using Machine Learning: a
                 Tutorial",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "46:1--46:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3579823",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579823",
  abstract =     "With the growth and globalization of IC design and
                 development, there is an increase in the number of
                 Designers and Design houses. As setting up a
                 fabrication facility may easily cost upwards of \$20
                 billion, costs for advanced nodes may be even greater.
                 IC design houses that cannot produce their chips
                 in-house have no option but to use external foundries
                 that are often in other countries. Establishing trust
                 with these external foundries can be a challenge, and
                 these foundries are assumed to be untrusted. The use of
                 these untrusted foundries in the global semiconductor
                 supply chain has raised concerns about the security of
                 the fabricated ICs targeted for sensitive applications.
                 One of these security threats is the adversarial
                 infestation of fabricated ICs with a Hardware Trojan
                 (HT). An HT can be broadly described as a malicious
                 modification to a circuit to control, modify, disable,
                 or monitor its logic. Conventional VLSI manufacturing
                 tests and verification methods fail to detect HT due to
                 the different and unmodeled nature of these malicious
                 modifications. Current state-of-the-art HT detection
                 methods utilize statistical analysis of various
                 side-channel information collected from ICs, such as
                 power analysis, power supply transient analysis,
                 regional supply current analysis, temperature analysis,
                 wireless transmission power analysis, and delay
                 analysis. To detect HTs, most methods require a
                 Trojan-free reference golden IC. A signature from these
                 golden ICs is extracted and used to detect ICs with
                 HTs. However, access to a golden IC is not always
                 feasible. Thus, a mechanism for HT detection is sought
                 that does not require the golden IC. Machine Learning
                 (ML) approaches have emerged to be extremely useful in
                 helping eliminate the need for a golden IC. Recent
                 works on utilizing ML for HT detection have been shown
                 to be promising in achieving this goal. Thus, in this
                 tutorial, we will explain utilizing ML as a solution to
                 the challenge of HT detection. Additionally, we will
                 describe the Electronic Design Automation (EDA) tool
                 flow for automating ML-assisted HT detection. Moreover,
                 to further discuss the benefits of ML-assisted HT
                 detection solutions, we will demonstrate a Neural
                 Network (NN)-assisted timing profiling method for HT
                 detection. Finally, we will discuss the shortcomings
                 and open challenges of ML-assisted HT detection
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2023:TRS,
  author =       "Tailin Liang and Lei Wang and Shaobo Shi and John
                 Glossner and Xiaotong Zhang",
  title =        "{TCX}: a {RISC} Style Tensor Computing Extension and a
                 Programmable Tensor Processor",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "47:1--47:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568310",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568310",
  abstract =     "Neural network processors and accelerators are
                 domain-specific architectures deployed to solve the
                 high computational requirements of deep learning
                 algorithms. This article proposes a new instruction set
                 extension for tensor computing, TCX, using Reduced
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Dong:2023:RAS,
  author =       "Yi Dong and Wei Huang and Vibhav Bharti and Victoria
                 Cox and Alec Banks and Sen Wang and Xingyu Zhao and
                 Sven Schewe and Xiaowei Huang",
  title =        "Reliability Assessment and Safety Arguments for
                 Machine Learning Components in System Assurance",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "48:1--48:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3570918",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570918",
  abstract =     "The increasing use of Machine Learning (ML) components
                 embedded in autonomous systems-so-called
                 Learning-Enabled Systems (LESs)-has resulted in the
                 pressing need to assure their functional safety. As for
                 traditional functional safety, the emerging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yan:2023:MBD,
  author =       "Zujia Yan and Yi Zhuang and Weining Zheng and Jingjing
                 Gu",
  title =        "Multi-bit Data Flow Error Detection Method Based on
                 {SDC} Vulnerability Analysis",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "49:1--49:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3572838",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572838",
  abstract =     "One of the most difficult data flow errors to detect
                 caused by single-event upsets in space radiation is the
                 Silent Data Corruption (SDC). To solve the problem of
                 multi-bit upsets causing program SDC, an instruction
                 multi-bit SDC vulnerability prediction \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kohler:2023:RCE,
  author =       "Leonie K{\"o}hler and Phil Hertha and Matthias Beckert
                 and Alex Bendrick and Rolf Ernst",
  title =        "Robust Cause-Effect Chains with Bounded Execution Time
                 and System-Level Logical Execution Time",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "50:1--50:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3573388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3573388",
  abstract =     "In automotive and industrial real-time software
                 systems, the primary timing constraints relate to
                 cause-effect chains. A cause-effect chain is a sequence
                 of linked tasks and it typically implements the process
                 of reading sensor data, computing algorithms,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tuli:2023:CNA,
  author =       "Shikhar Tuli and Chia-Hao Li and Ritvik Sharma and
                 Niraj K. Jha",
  title =        "{CODEBench}: a Neural Architecture and Hardware
                 Accelerator Co-Design Framework",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "51:1--51:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3575798",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3575798",
  abstract =     "Recently, automated co-design of machine learning (ML)
                 models and accelerator architectures has attracted
                 significant attention from both the industry and
                 academia. However, most co-design frameworks either
                 explore a limited search space or employ \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yi:2023:EEE,
  author =       "Saehanseul Yi and Tae-Wook Kim and Jong-Chan Kim and
                 Nikil Dutt",
  title =        "{EASYR}: Energy-Efficient Adaptive System
                 Reconfiguration for Dynamic Deadlines in Autonomous
                 Driving on Multicore Processors",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "52:1--52:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3570503",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570503",
  abstract =     "The increasing computing demands of autonomous driving
                 applications have driven the adoption of multicore
                 processors in real-time systems, which in turn renders
                 energy optimizations critical for reducing battery
                 capacity and vehicle weight. A typical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zacharopoulos:2023:TEH,
  author =       "Georgios Zacharopoulos and Adel Ejjeh and Ying Jing
                 and En-Yu Yang and Tianyu Jia and Iulian Brumar and
                 Jeremy Intan and Muhammad Huzaifa and Sarita Adve and
                 Vikram Adve and Gu-Yeon Wei and David Brooks",
  title =        "{Trireme}: Exploration of Hierarchical Multi-level
                 Parallelism for Hardware Acceleration",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "53:1--53:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3580394",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580394",
  abstract =     "The design of heterogeneous systems that include
                 domain specific accelerators is a challenging and
                 time-consuming process. While taking into account area
                 constraints, designers must decide which parts of an
                 application to accelerate in hardware and which
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lindemann:2023:RSS,
  author =       "Lars Lindemann and Lejun Jiang and Nikolai Matni and
                 George J. Pappas",
  title =        "Risk of Stochastic Systems for Temporal Logic
                 Specifications",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "54:1--54:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3580490",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580490",
  abstract =     "The wide availability of data coupled with the
                 computational advances in artificial intelligence and
                 machine learning promise to enable many future
                 technologies such as autonomous driving. While there
                 has been a variety of successful demonstrations of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yin:2023:CBR,
  author =       "Jun Yin and Marian Verhelst",
  title =        "{CNN}-based Robust Sound Source Localization with
                 {SRP-PHAT} for the Extreme Edge",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "55:1--55:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3586996",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3586996",
  abstract =     "Robust sound source localization for environments with
                 noise and reverberation are increasingly exploiting
                 deep neural networks fed with various acoustic
                 features. Yet, state-of-the-art research mainly focuses
                 on optimizing algorithmic accuracy, resulting
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tabanelli:2023:DAY,
  author =       "Enrico Tabanelli and Giuseppe Tagliavini and Luca
                 Benini",
  title =        "{DNN} Is Not All You Need: Parallelizing Non-neural
                 {ML} Algorithms on Ultra-low-power {IoT} Processors",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "56:1--56:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3571133",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3571133",
  abstract =     "Machine Learning (ML) functions are becoming
                 ubiquitous in latency- and privacy-sensitive IoT
                 applications, prompting a shift toward near-sensor
                 processing at the extreme edge and the consequent
                 increasing adoption of Parallel Ultra-low-power (PULP)
                 IoT \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2023:EAD,
  author =       "Yirui Wu and Lilai Zhang and Zonghua Gu and Hu Lu and
                 Shaohua Wan",
  title =        "Edge-{AI}-Driven Framework with Efficient Mobile
                 Network Design for Facial Expression Recognition",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "57:1--57:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587038",
  abstract =     "Facial Expression Recognition (FER) in the wild poses
                 significant challenges due to realistic occlusions,
                 illumination, scale, and head pose variations of the
                 facial images. In this article, we propose an
                 Edge-AI-driven framework for FER. On the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Isik:2023:NNC,
  author =       "Berivan Isik and Kristy Choi and Xin Zheng and Tsachy
                 Weissman and Stefano Ermon and H.-S. Philip Wong and
                 Armin Alaghi",
  title =        "Neural Network Compression for Noisy Storage Devices",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "58:1--58:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3588436",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3588436",
  abstract =     "Compression and efficient storage of neural network
                 (NN) parameters is critical for applications that run
                 on resource-constrained devices. Despite the
                 significant progress in NN model compression, there has
                 been considerably less investigation in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kloda:2023:LLS,
  author =       "Tomasz Kloda and Giovani Gracioli and Rohan Tabish and
                 Reza Mirosanlou and Renato Mancuso and Rodolfo
                 Pellizzoni and Marco Caccamo",
  title =        "Lazy Load Scheduling for Mixed-criticality
                 Applications in Heterogeneous {MPSoCs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "3",
  pages =        "59:1--59:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587694",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jul 3 08:20:15 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587694",
  abstract =     "Newly emerging multiprocessor system-on-a-chip (MPSoC)
                 platforms provide hard processing cores with
                 programmable logic (PL) for high-performance computing
                 applications. In this article, we take a deep look into
                 these commercially available heterogeneous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Parra:2023:TMV,
  author =       "Pablo Parra and Antonio {Da Silva} and Borja Losa and
                 J. Ignacio Garc{\'\i}a and {\'O}scar R. Polo and
                 Agust{\'\i}n Mart{\'\i}nez and Sebasti{\'a}n
                 S{\'a}nchez",
  title =        "Tailor-made Virtualization Monitor Design for {CPU}
                 Virtualization on {LEON} Processors",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "60:1--60:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3584702",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3584702",
  abstract =     "In recent decades, mixed-criticality systems have been
                 widely adopted to reduce the complexity and development
                 times of real-time critical applications. In these
                 systems, applications run on a separation kernel
                 hypervisor, a software element that controls \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Papaioannou:2023:ULP,
  author =       "Alexios Papaioannou and Charalampos S. Kouzinopoulos
                 and Dimosthenis Ioannidis and Dimitrios Tzovaras",
  title =        "An Ultra-low-power Embedded {AI} Fire Detection and
                 Crowd Counting System for Indoor Areas",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "61:1--61:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3582433",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582433",
  abstract =     "Fire incidents in residential and industrial areas are
                 often the cause of human casualties and property
                 damage. Although there are existing systems that detect
                 fire and monitor the presence of people in indoor
                 areas, research on their implementation in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhattacharjee:2023:XEA,
  author =       "Abhiroop Bhattacharjee and Abhishek Moitra and
                 Priyadarshini Panda",
  title =        "{XploreNAS}: Explore Adversarially Robust and
                 Hardware-efficient Neural Architectures for Non-ideal
                 Xbars",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "62:1--62:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3593045",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3593045",
  abstract =     "Compute In-Memory platforms such as memristive
                 crossbars are gaining focus as they facilitate
                 acceleration of Deep Neural Networks (DNNs) with high
                 area and compute efficiencies. However, the intrinsic
                 non-idealities associated with the analog nature of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gunzel:2023:CTA,
  author =       "Mario G{\"u}nzel and Kuan-Hsun Chen and Niklas Ueter
                 and Georg von der Br{\"u}ggen and Marco D{\"u}rr and
                 Jian-Jia Chen",
  title =        "Compositional Timing Analysis of Asynchronized
                 Distributed Cause-effect Chains",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "63:1--63:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587036",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587036",
  abstract =     "Real-time systems require the formal guarantee of
                 timing constraints, not only for the individual tasks
                 but also for the end-to-end latency of data flows. The
                 data flow among multiple tasks, e.g., from sensors to
                 actuators, is described by a cause-effect \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shadab:2023:HHC,
  author =       "Rakin Muhammad Shadab and Yu Zou and Sanjay Gandham
                 and Amro Awad and Mingjie Lin",
  title =        "{HMT}: a Hardware-centric Hybrid Bonsai {Merkle} Tree
                 Algorithm for High-performance Authentication",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "64:1--64:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3595179",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3595179",
  abstract =     "The Bonsai Merkle tree (BMT) is a widely used tree
                 structure for authentication of metadata such as
                 encryption counters in a secure computing system.
                 Common BMT algorithms were designed for traditional Von
                 Neumann architectures with a software-centric
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Min:2023:MTK,
  author =       "Donghyun Min and Kihyun Kim and Chaewon Moon and Awais
                 Khan and Seungjin Lee and Changhwan Yun and Woosuk
                 Chung and Youngjae Kim",
  title =        "A Multi-tenant Key-value {SSD} with Secondary Index
                 for Search Query Processing and Analysis",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "65:1--65:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3590153",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3590153",
  abstract =     "Key-value SSDs (KVSSDs) introduced so far are limited
                 in their use as an alternative to the key-value store
                 running on the host due to the following technical
                 limitations. First, they were designed only for a
                 single tenant, limiting the use of multiple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2023:OCS,
  author =       "Lin Zhang and Zifan Wang and Fanxin Kong",
  title =        "Optimal Checkpointing Strategy for Real-time Systems
                 with Both Logical and Timing Correctness",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "66:1--66:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3603172",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3603172",
  abstract =     "Real-time systems are susceptible to adversarial
                 factors such as faults and attacks, leading to severe
                 consequences. This paper presents an optimal checkpoint
                 scheme to bolster fault resilience in real-time
                 systems, addressing both logical consistency and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{West:2023:RTU,
  author =       "Richard West and Ahmad Golchin and Anton Njavro",
  title =        "Real-Time {USB} Networking and Device {I/O}",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "67:1--67:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604429",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604429",
  abstract =     "Multicore PC-class embedded systems present an
                 opportunity to consolidate separate microcontrollers as
                 software-defined functions. For instance, an automotive
                 system with more than 100 electronic control units
                 (ECUs) could be replaced with one or, at most,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kohl:2023:MBD,
  author =       "Maximilian A. K{\"o}hl and Holger Hermanns",
  title =        "Model-Based Diagnosis of Real-Time Systems: Robustness
                 Against Varying Latency, Clock Drift, and Out-of-Order
                 Observations",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "68:1--68:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597209",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597209",
  abstract =     "Online fault diagnosis techniques are a key enabler of
                 effective failure mitigation. For real-time systems,
                 the problem of identifying faults is aggravated by
                 timing imprecisions such as varying latency between
                 events and their observation. This paper \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Terway:2023:RGM,
  author =       "Prerit Terway and Niraj K. Jha",
  title =        "{REPAIRS}: {Gaussian} Mixture Model-based Completion
                 and Optimization of Partially Specified Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "69:1--69:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605147",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605147",
  abstract =     "Most system optimization techniques focus on finding
                 the values of the system components to achieve the best
                 performance. Searching over all component values gives
                 the search methodology the freedom to explore the
                 entire design space to determine the best \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hsu:2023:GBC,
  author =       "Yao-Jen Hsu and Chin-Hsien Wu and Yu-Chieh Tsai and
                 Chia-Cheng Liu",
  title =        "A Granularity-Based Clustering Method for Reducing
                 Write Amplification in Solid-State Drives",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "70:1--70:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605779",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605779",
  abstract =     "In recent years, solid-state drives (SSDs) that adopt
                 NAND flash memory have been widely used as the main
                 storage devices. In particular, NAND flash memory has a
                 special feature of ``out-of-place'' updates to write
                 the up-to-date data to a free page, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Niknafs:2023:RRM,
  author =       "Mina Niknafs and Petru Eles and Zebo Peng",
  title =        "Runtime Resource Management with Multiple-Step-Ahead
                 Workload Prediction",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "71:1--71:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605213",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605213",
  abstract =     "Modern embedded platforms need sophisticated resource
                 managers to utilize their heterogeneous computational
                 resources efficiently. Furthermore, such platforms are
                 subject to fluctuating workloads that are unforeseeable
                 at design time. Predicting the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bosio:2023:SIA,
  author =       "Alberto Bosio and Lara Dolecek and Alexandra Kourfali
                 and Sri Parameswaran and Alessandro Savino",
  title =        "Special Issue: {``Approximation at the Edge''}",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "72:1--72:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605757",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605757",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pradhan:2023:ETB,
  author =       "Chetana Pradhan and Martin Letras and J{\"u}rgen
                 Teich",
  title =        "Efficient Table-based Function Approximation on
                 {FPGAs} Using Interval Splitting and {BRAM}
                 Instantiation",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "73:1--73:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3580737",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/elefunt.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580737",
  abstract =     "This article proposes a novel approach for the
                 generation of memory-efficient table-based function
                 approximation circuits for edge devices in general and
                 FPGAs in particular. Given a function $ f(x) $ to be
                 approximated in a given interval $ [x_0, x_0 + a) $ and
                 a maximum approximation error $ E_a $, the goal is to
                 determine a function table implementation with a
                 minimized memory footprint, i.e., number of entries
                 that need to be stored. Rather than state-of-the-art
                 work performing an equidistant sampling of the given
                 interval by so-called breakpoints and using linear
                 interpolation between two adjacent breakpoints to
                 determine $ f(x) $ at the maximum error bound, we
                 propose and compare three algorithms for splitting the
                 given interval into sub-intervals to reduce the
                 required memory footprint drastically based on the
                 observation that in sub-intervals of low gradient, a
                 coarser sampling grid may be assumed while guaranteeing
                 the maximum interpolation error bound $ E_a $.
                 Experiments on elementary mathematical functions show
                 that a large fraction in memory footprint may be saved.
                 Second, a hardware architecture implementing the
                 sub-interval selection, breakpoint lookup, and
                 interpolation at a latency of just 9 clock cycles is
                 introduced. Third, for each generated circuit design,
                 BRAMs are automatically instantiated rather than
                 synthesizing the reduced footprint function table using
                 LUT primitives, providing an additional degree of
                 resource efficiency. The approach presented here for
                 FPGAs can equally be applied to other circuit
                 technologies for fast and, at the same time,
                 memory-optimized function approximation at the edge.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Paul:2023:ANI,
  author =       "Sibendu Paul and Utsav Drolia and Y. Charlie Hu and
                 Srimat Chakradhar",
  title =        "{AQuA}: a New Image Quality Metric for Optimizing
                 Video Analytics Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "74:1--74:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568423",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568423",
  abstract =     "Millions of cameras at the edge are being deployed to
                 power a variety of different deep learning
                 applications. However, the frames captured by these
                 cameras are not always pristine-they can be distorted
                 due to lighting issues, sensor noise, compression
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Leon-Vega:2023:AGR,
  author =       "Luis G. Le{\'o}n-Vega and Eduardo Salazar-Villalobos
                 and Alejandro Rodriguez-Figueroa and Jorge
                 Castro-God{\'\i}nez",
  title =        "Automatic Generation of Resource and Accuracy
                 Configurable Processing Elements",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "75:1--75:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3594540",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594540",
  abstract =     "Low-power consumption and scarce computational
                 resources limit the computation at the edge. Besides,
                 the approximate computing paradigm reports promising
                 techniques for designing accelerators to deal with
                 inherent limitations of the edge, and high-level
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Awais:2023:TOS,
  author =       "Muhammad Awais and Ali Zahir and Syed Ayaz Ali Shah
                 and Pedro Reviriego and Anees Ullah and Nasim Ullah and
                 Adam Khan and Hazrat Ali",
  title =        "Toward Optimal Softcore Carry-aware Approximate
                 Multipliers on {Xilinx} {FPGAs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "76:1--76:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564243",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564243",
  abstract =     "Domain-specific accelerators for signal processing,
                 image processing, and machine learning are increasingly
                 being implemented on SRAM-based field-programmable gate
                 arrays (FPGAs). Owing to the inherent error tolerance
                 of such applications, approximate arithmetic
                 operations, in particular, the design of approximate
                 multipliers, have become an important research
                 problem. Truncation of lower bits is a widely used
                 approximation approach; however, analyzing and limiting
                 the effects of carry-propagation due to this
                 approximation has not been explored in detail yet. In
                 this article, an optimized carry-aware approximate
                 radix-4 Booth multiplier design is presented that
                 leverages the built-in slice look-up tables (LUTs) and
                 carry-chain resources in a novel configuration. The
                 proposed multiplier simplifies the computation of the
                 upper and lower bits and provides significant benefits
                 in terms of FPGA resource usage (LUTs saving
                 38.5\%--42.9\%), Power Delay Product (PDP saving
                 49.4\%--53\%), performance metric (LUTs $ \times $
                 critical path delay (CPD) $ \times $ PDP saving
                 68.9\%--73.1\%) and errors (70\% improvement in mean
                 relative error distance) compared to the latest
                 state-of-the-art designs. Therefore, the proposed
                 designs are an attractive choice to implement
                 multiplication on FPGA-based accelerators.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2023:EEA,
  author =       "Soumendu Kumar Ghosh and Arnab Raha and Vijay
                 Raghunathan",
  title =        "Energy-Efficient Approximate Edge Inference Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "77:1--77:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589766",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589766",
  abstract =     "The rapid proliferation of the Internet of Things and
                 the dramatic resurgence of artificial intelligence
                 based application workloads have led to immense
                 interest in performing inference on energy-constrained
                 edge devices. Approximate computing (a design
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tsounis:2023:MFT,
  author =       "Ioannis Tsounis and Dimitris Agiakatsikas and Mihalis
                 Psarakis",
  title =        "A Methodology for Fault-tolerant {Pareto}-optimal
                 Approximate Designs of {FPGA}-based Accelerators",
  journal =      j-TECS,
  volume =       "22",
  number =       "4",
  pages =        "78:1--78:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568021",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 10 07:21:24 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568021",
  abstract =     "Approximate Computing Techniques (ACTs) take advantage
                 of resilience computing applications to trade off among
                 output precision, area, power, and performance. ACTs
                 can lead to significant gains at affordable costs when
                 efficiently implemented on Field \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Pan:2023:BBS,
  author =       "Yunjie Pan and Jiecao Yu and Andrew Lukefahr and
                 Reetuparna Das and Scott Mahlke",
  title =        "{BitSET}: Bit-Serial Early Termination for Computation
                 Reduction in Convolutional Neural Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "98:1--98:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609093",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609093",
  abstract =     "Convolutional Neural Networks (CNNs) have demonstrated
                 remarkable performance across a wide range of machine
                 learning tasks. However, the high accuracy \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yang:2023:EEP,
  author =       "Zhao Yang and Qingshuang Sun",
  title =        "Energy-efficient Personalized Federated Search with
                 Graph for Edge Computing",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "99:1--99:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609435",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609435",
  abstract =     "Federated Learning (FL) is a popular method for
                 privacy-preserving machine learning on edge devices.
                 However, the heterogeneity of edge devices, including
                 differences \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2023:EEM,
  author =       "Yitu Wang and Shiyu Li and Qilin Zheng and Andrew
                 Chang and Hai Li and Yiran Chen",
  title =        "{EMS-i}: an Efficient Memory System Design with
                 Specialized Caching Mechanism for Recommendation
                 Inference",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "100:1--100:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609384",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609384",
  abstract =     "Recommendation systems have been widely embedded into
                 many Internet services. For example, Meta's deep
                 learning recommendation model (DLRM) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sahoo:2023:ATS,
  author =       "Siva Satyendra Sahoo and Salim Ullah and Akash Kumar",
  title =        "{AxOTreeS}: a Tree Search Approach to Synthesizing
                 {FPGA}-based Approximate Operators",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "101:1--101:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609096",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609096",
  abstract =     "Approximate computing (AxC) provides the scope for
                 achieving disproportionate gains in a system's power,
                 performance, and area (PPA) metrics by leveraging an
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Afifi:2023:GGN,
  author =       "Salma Afifi and Febin Sunny and Amin Shafiee and Mahdi
                 Nikdast and Sudeep Pasricha",
  title =        "{GHOST}: a Graph Neural Network Accelerator using
                 Silicon Photonics",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "102:1--102:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609097",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609097",
  abstract =     "Graph neural networks (GNNs) have emerged as a
                 powerful approach for modelling and learning from
                 graph-structured data. Multiple fields have since
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ren:2023:PWB,
  author =       "Jiankang Ren and Chunxiao Liu and Chi Lin and Ran Bi
                 and Simeng Li and Zheng Wang and Yicheng Qian and
                 Zhichao Zhao and Guozhen Tan",
  title =        "Protection Window Based Security-Aware Scheduling
                 against Schedule-Based Attacks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "103:1--103:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609098",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609098",
  abstract =     "With widespread use of common-off-the-shelf components
                 and the drive towards connection with external
                 environments, the real-time systems are facing more
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sha:2023:PSR,
  author =       "Zhibing Sha and Jiaojiao Wu and Jun Li and Balazs
                 Gerofi and Zhigang Cai and Jianwei Liao",
  title =        "Proactive Stripe Reconstruction to Improve Cache Use
                 Efficiency of {SSD}-Based {RAID} Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "104:1--104:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609099",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609099",
  abstract =     "Solid-State Drives (SSDs) exhibit different failure
                 characteristics compared to conventional hard disk
                 drives. In particular, the Bit Error Rate (BER) of an
                 SSD \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mousavi:2023:DDA,
  author =       "Hamid Mousavi and Mohammad Loni and Mina Alibeigi and
                 Masoud Daneshtalab",
  title =        "{DASS}: Differentiable Architecture Search for Sparse
                 Neural Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "105:1--105:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609385",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609385",
  abstract =     "The deployment of Deep Neural Networks (DNNs) on edge
                 devices is hindered by the substantial gap between
                 performance requirements and available \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Clair:2023:SED,
  author =       "Judicael Clair and Guy Eichler and Luca P. Carloni",
  title =        "{SpikeHard}: Efficiency-Driven Neuromorphic Hardware
                 for Heterogeneous Systems-on-Chip",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "106:1--106:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609101",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609101",
  abstract =     "Neuromorphic computing is an emerging field with the
                 potential to offer performance and energy-efficiency
                 gains over traditional machine learning approaches.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Klashtorny:2023:PGW,
  author =       "Artem Klashtorny and Zhuanhao Wu and Anirudh Mohan
                 Kaushik and Hiren Patel",
  title =        "Predictable {GPU} Wavefront Splitting for
                 Safety-Critical Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "107:1--107:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609102",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609102",
  abstract =     "We present a predictable wavefront splitting (PWS)
                 technique for graphics processing units (GPUs). PWS
                 improves the performance of GPU applications by
                 reducing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "107",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Odema:2023:MMA,
  author =       "Mohanad Odema and Halima Bouzidi and Hamza Ouarnoughi
                 and Smail Niar and Mohammad Abdullah {Al Faruque}",
  title =        "{MaGNAS}: a Mapping-Aware Graph Neural Architecture
                 Search Framework for Heterogeneous {MPSoC} Deployment",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "108:1--108:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609386",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609386",
  abstract =     "Graph Neural Networks (GNNs) are becoming increasingly
                 popular for vision-based applications due to their
                 intrinsic capacity in modeling structural and
                 contextual \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "108",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mondal:2023:PPR,
  author =       "Anupam Mondal and Shreya Gangopadhyay and Durba
                 Chatterjee and Harishma Boyapally and Debdeep
                 Mukhopadhyay",
  title =        "{PReFeR} : Physically Related Function based Remote
                 Attestation Protocol",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "109:1--109:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609104",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609104",
  abstract =     "Remote attestation is a request-response based
                 security service that permits a trusted entity
                 (verifier) to check the current state of an untrusted
                 remote \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "109",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ikeda:2023:MDD,
  author =       "Sosei Ikeda and Hiromitsu Awano and Takashi Sato",
  title =        "Modular {DFR}: Digital Delayed Feedback Reservoir
                 Model for Enhancing Design Flexibility",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "110:1--110:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609105",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609105",
  abstract =     "A delayed feedback reservoir (DFR) is a type of
                 reservoir computing system well-suited for hardware
                 implementations owing to its simple structure. Most
                 existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "110",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mishra:2023:VVA,
  author =       "Vishesh Mishra and Sparsh Mittal and Neelofar Hassan
                 and Rekha Singhal and Urbi Chatterjee",
  title =        "{VADF}: Versatile Approximate Data Formats for
                 Energy-Efficient Computing",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "111:1--111:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609106",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609106",
  abstract =     "Approximate computing (AC) techniques provide overall
                 performance gains in terms of power and energy savings
                 at the cost of minor loss in application accuracy.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "111",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Halder:2023:OPN,
  author =       "Dipal Halder and Maneesh Merugu and Sandip Ray",
  title =        "{ObNoCs}: Protecting Network-on-Chip Fabrics Against
                 Reverse-Engineering Attacks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "112:1--112:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609107",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609107",
  abstract =     "Modern System-on-Chip designs typically use
                 Network-on-Chip (NoC) fabrics to implement coordination
                 among integrated hardware blocks. An important
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "112",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Basaklar:2023:DDT,
  author =       "Toygun Basaklar and A. Alper Goksoy and Anish
                 Krishnakumar and Suat Gumussoy and Umit Y. Ogras",
  title =        "{DTRL}: Decision Tree-based Multi-Objective
                 Reinforcement Learning for Runtime Task Scheduling in
                 Domain-Specific System-on-Chips",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "113:1--113:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609108",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609108",
  abstract =     "Domain-specific systems-on-chip (DSSoCs) combine
                 general-purpose processors and specialized hardware
                 accelerators to improve performance and energy
                 efficiency \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "113",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Juang:2023:LCG,
  author =       "Tzung-Han Juang and Christof Schlaak and Christophe
                 Dubach",
  title =        "Let Coarse-Grained Resources Be Shared: Mapping Entire
                 Neural Networks on {FPGAs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "114:1--114:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609109",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609109",
  abstract =     "Traditional High-Level Synthesis (HLS) provides rapid
                 prototyping of hardware accelerators without coding
                 with Hardware Description Languages (HDLs). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "114",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bakshi:2023:CED,
  author =       "Suyash Bakshi and Lennart Johnsson",
  title =        "Computationally Efficient {DNN} Mapping Search
                 Heuristic using Deep Reinforcement Learning",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "115:1--115:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609110",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609110",
  abstract =     "In this work, we present a computationally efficient
                 Reinforcement Learning mapping search heuristic for
                 finding high quality mappings for N-dimensional
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "115",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hussein:2023:CNC,
  author =       "Dina Hussein and Ganapati Bhat",
  title =        "{CIM}: a Novel Clustering-based Energy-Efficient Data
                 Imputation Method for Human Activity Recognition",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "116:1--116:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609111",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609111",
  abstract =     "Human activity recognition (HAR) is an important
                 component in a number of health applications, including
                 rehabilitation, Parkinson's disease, daily activity
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "116",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ravi:2023:VLV,
  author =       "Akshara Ravi and Vivek Chaturvedi and Muhammad
                 Shafique",
  title =        "{ViT4Mal}: Lightweight Vision Transformer for Malware
                 Detection on Edge Devices",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "117:1--117:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609112",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609112",
  abstract =     "There has been a tremendous growth of edge devices
                 connected to the network in recent years. Although
                 these devices make our life simpler and smarter, they
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "117",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Deb:2023:ZDT,
  author =       "Dipika Deb and John Jose",
  title =        "{ZPP}: a Dynamic Technique to Eliminate Cache
                 Pollution in {NoC} based {MPSoCs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "118:1--118:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609113",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609113",
  abstract =     "Data prefetching efficiently reduces the memory access
                 latency in NUCA architectures as the Last Level Cache
                 (LLC) is shared and distributed across \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "118",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2023:WTM,
  author =       "Shin-Ting Wu and Liang-Chi Chen and Po-Chun Huang and
                 Yuan-Hao Chang and Chien-Chung Ho and Wei-Kuan Shih",
  title =        "{WARM}-tree: Making Quadtrees Write-efficient and
                 Space-economic on Persistent Memories",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "119:1--119:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608033",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608033",
  abstract =     "Recently, the value of data has been widely
                 recognized, which highlights the significance of
                 data-centric computing in diversified application
                 scenarios. In many \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "119",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shen:2023:TMS,
  author =       "Yixian Shen and Leo Schreuders and Anuj Pathania and
                 Andy D. Pimentel",
  title =        "Thermal Management for {$3$D}-Stacked Systems via
                 Unified Core-Memory Power Regulation",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "120:1--120:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608040",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608040",
  abstract =     "3D-stacked processor-memory systems stack memory (DRAM
                 banks) directly on top of logic (CPU cores) using
                 chiplet-on-chiplet packaging technology to provide
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "120",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ponzina:2023:OFC,
  author =       "Flavio Ponzina and Marco Rios and Alexandre Levisse
                 and Giovanni Ansaloni and David Atienza",
  title =        "Overflow-free Compute Memories for Edge {AI}
                 Acceleration",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "121:1--121:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609387",
  abstract =     "Compute memories are memory arrays augmented with
                 dedicated logic to support arithmetic. They support the
                 efficient execution of data-centric \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "121",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vali:2023:BSD,
  author =       "Kourosh Vali and Ata Vafi and Begum Kasap and Soheil
                 Ghiasi",
  title =        "{BASS}: Safe Deep Tissue Optical Sensing for Wearable
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "122:1--122:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607916",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607916",
  abstract =     "In wearable optical sensing applications whose target
                 tissue is not superficial, such as deep tissue
                 oximetry, the task of embedded system design has to
                 strike a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "122",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Huai:2023:CCR,
  author =       "Shuo Huai and Hao Kong and Xiangzhong Luo and Shiqing
                 Li and Ravi Subramaniam and Christian Makaya and Qian
                 Lin and Weichen Liu",
  title =        "{CRIMP}: Compact \& Reliable {DNN} Inference on
                 In-Memory Processing via Crossbar-Aligned Compression
                 and Non-ideality Adaptation",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "123:1--123:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609115",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609115",
  abstract =     "Crossbar-based In-Memory Processing (IMP) accelerators
                 have been widely adopted to achieve high-speed and
                 low-power computing, especially for deep \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "123",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yen:2023:KBR,
  author =       "Chih-Hsuan Yen and Hashan Roshantha Mendis and Tei-Wei
                 Kuo and Pi-Cheng Hsiu",
  title =        "Keep in Balance: Runtime-reconfigurable Intermittent
                 Deep Inference",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "124:1--124:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607918",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607918",
  abstract =     "Intermittent deep neural network (DNN) inference is a
                 promising technique to enable intelligent applications
                 on tiny devices powered by ambient energy \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "124",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gufran:2023:FHR,
  author =       "Danish Gufran and Sudeep Pasricha",
  title =        "{FedHIL}: Heterogeneity Resilient Federated Learning
                 for Robust Indoor Localization with Mobile Devices",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "125:1--125:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607919",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607919",
  abstract =     "Indoor localization plays a vital role in applications
                 such as emergency response, warehouse management, and
                 augmented reality experiences. By deploying \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "125",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xia:2023:SPS,
  author =       "Chengpeng Xia and Yawen Chen and Haibo Zhang and
                 Jigang Wu",
  title =        "{STADIA}: Photonic Stochastic Gradient Descent for
                 Neural Network Accelerators",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "126:1--126:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607920",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607920",
  abstract =     "Deep Neural Networks (DNNs) have demonstrated great
                 success in many fields such as image recognition and
                 text analysis. However, the ever-increasing sizes
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "126",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chang:2023:LEL,
  author =       "Jung-Hsiu Chang and Tzu-Yu Chang and Yi-Chao Shih and
                 Tseng-Yi Chen",
  title =        "{LaDy}: Enabling Locality-aware Deduplication
                 Technology on Shingled Magnetic Recording Drives",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "127:1--127:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607921",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607921",
  abstract =     "The continuous increase in data volume has led to the
                 adoption of shingled-magnetic recording (SMR) as the
                 primary technology for modern storage drives. This
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "127",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lien:2023:FFS,
  author =       "Yi-Han Lien and Yen-Ting Chen and Yuan-Hao Chang and
                 Yu-Pei Liang and Wei-Kuan Shih",
  title =        "{FSIMR}: File-system-aware Data Management for
                 Interlaced Magnetic Recording",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "128:1--128:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607922",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607922",
  abstract =     "Interlaced Magnetic Recording (IMR) is an emerging
                 recording technology for hard-disk drives (HDDs) that
                 provides larger storage capacity at a lower cost. By
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "128",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2023:IIE,
  author =       "Wentong Li and Liang Shi and Hang Li and Changlong Li
                 and Edwin Hsing-Mean Sha",
  title =        "{IOSR}: Improving {I/O} Efficiency for Memory Swapping
                 on Mobile Devices Via Scheduling and Reshaping",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "129:1--129:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607923",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607923",
  abstract =     "Mobile systems and applications are becoming
                 increasingly feature-rich and powerful, which
                 constantly suffer from memory pressure, especially for
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "129",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Modi:2023:CRR,
  author =       "Garima Modi and Aritra Bagchi and Neetu Jindal and
                 Ayan Mandal and Preeti Ranjan Panda",
  title =        "{CABARRE}: Request Response Arbitration for Shared
                 Cache Management",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "130:1--130:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608096",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608096",
  abstract =     "Modern multi-processor systems-on-chip (MPSoCs) are
                 characterized by caches shared by multiple cores. These
                 shared caches receive requests issued by the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "130",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ahmed:2023:SAH,
  author =       "Soyed Tuhin Ahmed and Kamal Danouchi and Michael
                 Hefenbrock and Guillaume Prenat and Lorena Anghel and
                 Mehdi B. Tahoori",
  title =        "{SpinBayes}: Algorithm-Hardware Co-Design for
                 Uncertainty Estimation Using {Bayesian} In-Memory
                 Approximation on Spintronic-Based Architectures",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "131:1--131:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609116",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609116",
  abstract =     "Recent development in neural networks (NNs) has led to
                 their widespread use in critical and automated
                 decision-making systems, where uncertainty \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "131",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sharma:2023:FCD,
  author =       "Harsh Sharma and Lukas Pfromm and Rasit Onur Topaloglu
                 and Janardhan Rao Doppa and Umit Y. Ogras and Ananth
                 Kalyanraman and Partha Pratim Pande",
  title =        "Florets for Chiplets: Data Flow-aware High-Performance
                 and Energy-efficient Network-on-Interposer for {CNN}
                 Inference Tasks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "132:1--132:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608098",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608098",
  abstract =     "Recent advances in 2.5D chiplet platforms provide a
                 new avenue for compact scale-out implementations of
                 emerging compute- and data-intensive applications
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "132",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Nassar:2023:APM,
  author =       "Hassan Nassar and Lars Bauer and J{\"o}rg Henkel",
  title =        "{ANV-PUF}: Machine-Learning-Resilient {NVM}-Based
                 Arbiter {PUF}",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "133:1--133:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609388",
  abstract =     "Physical Unclonable Functions (PUFs) have been widely
                 considered an attractive security primitive. They use
                 the deviations in the fabrication process to have
                 unique \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "133",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sorrentino:2023:HCA,
  author =       "Giuseppe Sorrentino and Marco Venere and Davide
                 Conficconi and Eleonora D'Arnese and Marco Domenico
                 Santambrogio",
  title =        "{Hephaestus}: Codesigning and Automating {$3$D} Image
                 Registration on Reconfigurable Architectures",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "134:1--134:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607928",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607928",
  abstract =     "Healthcare is a pivotal research field, and medical
                 imaging is crucial in many applications. Therefore
                 finding new architectural and algorithmic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "134",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Tuncel:2023:SSC,
  author =       "Yigit Tuncel and Toygun Basaklar and Dina
                 Carpenter-Graffy and Umit Ogras",
  title =        "A Self-Sustained {CPS} Design for Reliable Wildfire
                 Monitoring",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "135:1--135:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608100",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608100",
  abstract =     "Continuous monitoring of areas nearby the electric
                 grid is critical for preventing and early detection of
                 devastating wildfires. Existing wildfire monitoring
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "135",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lohar:2023:SMF,
  author =       "Debasmita Lohar and Clothilde Jeangoudoux and
                 Anastasia Volkova and Eva Darulova",
  title =        "Sound Mixed Fixed-Point Quantization of Neural
                 Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "136:1--136:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609118",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609118",
  abstract =     "Neural networks are increasingly being used as
                 components in safety-critical applications, for
                 instance, as controllers in embedded systems. Their
                 formal \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "136",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bourke:2023:VCS,
  author =       "Timothy Bourke and Basile Pesin and Marc Pouzet",
  title =        "Verified Compilation of Synchronous Dataflow with
                 State Machines",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "137:1--137:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608102",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608102",
  abstract =     "Safety-critical embedded software is routinely
                 programmed in block-diagram languages. Recent work in
                 the V{\'e}lus project specifies such a language and its
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "137",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lee:2023:CVA,
  author =       "Edward A. Lee and Ravi Akella and Soroush Bateni and
                 Shaokai Lin and Marten Lohstroh and Christian Menard",
  title =        "Consistency vs. Availability in Distributed
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "138:1--138:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609119",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609119",
  abstract =     "In distributed applications, Brewer's CAP theorem
                 tells us that when networks become partitioned (P), one
                 must give up either consistency (C) or availability
                 (A). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "138",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Peeck:2023:IWC,
  author =       "Jonas Peeck and Rolf Ernst",
  title =        "Improving Worst-case {TSN} Communication Times of
                 Large Sensor Data Samples by Exploiting
                 Synchronization",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "139:1--139:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609120",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609120",
  abstract =     "Higher levels of automated driving also require a more
                 sophisticated environmental perception. Therefore, an
                 increasing number of sensors transmit their data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "139",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chou:2023:RSK,
  author =       "Yi-Quan Chou and Lin-Wei Shen and Li-Pin Chang",
  title =        "Rectifying Skewed Kernel Page Reclamation in Mobile
                 Devices for Improving User-Perceivable Latency",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "140:1--140:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607937",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607937",
  abstract =     "A crucial design factor for users of smart mobile
                 devices is the latency of graphical interface
                 interaction. Switching a background app to foreground
                 is a frequent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "140",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Majumdar:2023:NAB,
  author =       "Rupak Majumdar and Mahmoud Salamati and Sadegh
                 Soudjani",
  title =        "Neural Abstraction-Based Controller Synthesis and
                 Deployment",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "141:1--141:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608104",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608104",
  abstract =     "Abstraction-based techniques are an attractive
                 approach for synthesizing correct-by-construction
                 controllers to satisfy high-level temporal
                 requirements. A \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "141",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Khan:2023:DDL,
  author =       "Osama Khan and Gwanjong Park and Euiseong Seo",
  title =        "{DaCapo}: an On-Device Learning Scheme for
                 Memory-Constrained Embedded Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "142:1--142:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609121",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609121",
  abstract =     "The use of deep neural network (DNN) applications in
                 microcontroller unit (MCU) embedded systems is getting
                 popular. However, the DNN models in such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "142",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gunzel:2023:PRT,
  author =       "Mario G{\"u}nzel and Niklas Ueter and Kuan-Hsun Chen
                 and Georg von der Br{\"u}ggen and Jian-Jia Chen",
  title =        "Probabilistic Reaction Time Analysis",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "143:1--143:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609390",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609390",
  abstract =     "In many embedded systems, for instance, in the
                 automotive, avionic, or robotics domain, critical
                 functionalities are implemented via chains of
                 communicating recurrent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "143",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Vreman:2023:SAC,
  author =       "Nils Vreman and Martina Maggio",
  title =        "Stochastic Analysis of Control Systems Subject to
                 Communication and Computation Faults",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "144:1--144:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609123",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609123",
  abstract =     "Control theory allows one to design controllers that
                 are robust to external disturbances, model
                 simplification, and modelling inaccuracy. Researchers
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "144",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zheng:2023:IIA,
  author =       "Yongchun Zheng and Changlong Li and Yi Xiong and
                 Weihong Liu and Cheng Ji and Zongwei Zhu and Lichen
                 Yu",
  title =        "{iAware}: Interaction Aware Task Scheduling for
                 Reducing Resource Contention in Mobile Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "145:1--145:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609391",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609391",
  abstract =     "To ensure the user experience of mobile systems, the
                 foreground application can be differentiated to
                 minimize the impact of background applications.
                 However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "145",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2023:FSN,
  author =       "Hanrui Zhao and Niuniu Qi and Lydia Dehbi and Xia Zeng
                 and Zhengfeng Yang",
  title =        "Formal Synthesis of Neural Barrier Certificates for
                 Continuous Systems via Counterexample Guided Learning",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "146:1--146:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609125",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609125",
  abstract =     "This paper presents a novel approach to safety
                 verification based on neural barrier certificates
                 synthesis for continuous dynamical systems. We
                 construct \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "146",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Loveless:2023:CML,
  author =       "Andrew Loveless and Linh Thi Xuan Phan and Lisa
                 Erickson and Ronald Dreslinski and Baris Kasikci",
  title =        "{CrossTalk}: Making Low-Latency Fault Tolerance Cheap
                 by Exploiting Redundant Networks",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "147:1--147:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609436",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609436",
  abstract =     "Real-time embedded systems perform many important
                 functions in the modern world. A standard way to
                 tolerate faults in these systems is with Byzantine
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "147",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shijubo:2023:PBB,
  author =       "Junya Shijubo and Masaki Waga and Kohei Suenaga",
  title =        "Probabilistic Black-Box Checking via Active {MDP}
                 Learning",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "148:1--148:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609127",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609127",
  abstract =     "We introduce a novel methodology for testing
                 stochastic black-box systems, frequently encountered in
                 embedded systems. Our approach enhances \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "148",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Singh:2023:KWC,
  author =       "Nikhilesh Singh and Karthikeyan Renganathan and
                 Chester Rebeiro and Jithin Jose and Ralph Mader",
  title =        "{Kryptonite}: Worst-Case Program Interference
                 Estimation on Multi-Core Embedded Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "149:1--149:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609128",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609128",
  abstract =     "Due to the low costs and energy needed, cyber-physical
                 systems are adopting multi-core processors for their
                 embedded computing requirements. In order \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "149",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Baruah:2023:OSR,
  author =       "Sanjoy Baruah and Alan Burns and Robert Ian Davis",
  title =        "Optimal Synthesis of Robust {IDK} Classifier
                 Cascades",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "150:1--150:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609129",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609129",
  abstract =     "An IDK classifier is a computing component that
                 categorizes inputs into one of a number of classes, if
                 it is able to do so with the required level of
                 confidence, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "150",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Brun:2023:EDA,
  author =       "L{\'e}lio Brun and Christophe Garion and
                 Pierre-Lo{\"\i}c Garoche and Xavier Thirioux",
  title =        "Equation-Directed Axiomatization of {Lustre} Semantics
                 to Enable Optimized Code Validation",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "151:1--151:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609393",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609393",
  abstract =     "Model-based design tools like SCADE Suite and Simulink
                 are often used to design safety-critical embedded
                 software. Consequently, generating correct \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "151",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Colaco:2023:CSB,
  author =       "Jean-Louis Cola{\c{c}}o and Michael Mendler and
                 Baptiste Pauget and Marc Pouzet",
  title =        "A Constructive State-based Semantics and Interpreter
                 for a Synchronous Data-flow Language with State
                 Machines",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "152:1--152:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609131",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609131",
  abstract =     "Scade is a domain-specific synchronous functional
                 language used to implement safety-critical real-time
                 software for more than twenty years. Two main
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "152",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Thilakasiri:2023:MRP,
  author =       "Thilanka Thilakasiri and Matthias Becker",
  title =        "Methods to Realize Preemption in Phased Execution
                 Models",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "153:1--153:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609132",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609132",
  abstract =     "Phased execution models are a good solution to tame
                 the increased complexity and contention of commercial
                 off-the-shelf (COTS) multi-core platforms, e.g.,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "153",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Szeto:2023:BAB,
  author =       "Matthew Szeto and Edward Andert and Aviral Shrivastava
                 and Martin Reisslein and Chung-Wei Lin and Christ
                 Richmond",
  title =        "{B-AWARE}: Blockage Aware {RSU} Scheduling for {5G}
                 Enabled Autonomous Vehicles",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "154:1--154:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609133",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609133",
  abstract =     "5G Millimeter Wave (mmWave) technology holds great
                 promise for Connected Autonomous Vehicles (CAVs) due to
                 its ability to achieve data rates in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "154",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lin:2023:TBV,
  author =       "Shaokai Lin and Yatin A. Manerkar and Marten Lohstroh
                 and Elizabeth Polgreen and Sheng-Jung Yu and Chadlia
                 Jerad and Edward A. Lee and Sanjit A. Seshia",
  title =        "Towards Building Verifiable {CPS} using {Lingua
                 Franca}",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "155:1--155:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609134",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609134",
  abstract =     "Formal verification of cyber-physical systems (CPS) is
                 challenging because it has to consider real-time and
                 concurrency aspects that are often absent in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "155",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bartocci:2023:MHU,
  author =       "Ezio Bartocci and Cristinel Mateis and Eleonora
                 Nesterini and Dejan Ni{\v{c}}kovi{\'c}",
  title =        "Mining Hyperproperties using Temporal Logics",
  journal =      j-TECS,
  volume =       "22",
  number =       "5s",
  pages =        "156:1--156:??",
  month =        oct,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609394",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Sep 18 08:59:39 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609394",
  abstract =     "Formal specifications are essential to express
                 precisely systems, but they are often difficult to
                 define or unavailable. Specification mining aims to
                 automatically infer \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "156",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}


@Article{Metz:2023:BBS,
  author =       "David Metz and Vineet Kumar and Magnus Sj{\"a}lander",
  title =        "{BISDU}: a Bit-Serial Dot-Product Unit for
                 Microcontrollers",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "79:1--79:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608447",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/fparith.bib;
                 https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608447",
  abstract =     "Low-precision quantized neural networks (QNNs) reduce
                 the required memory space, bandwidth, and computational
                 power, and hence are suitable for deployment in
                 applications such as IoT edge devices. Mixed-precision
                 QNNs, where weights commonly have lower precision than
                 activations or different precision is used for
                 different layers, can limit the accuracy loss caused by
                 low-bit quantization, while still benefiting from
                 reduced memory footprint and faster execution. Previous
                 multiple-precision functional units supporting 8-bit,
                 4-bit, and 2-bit SIMD instructions have limitations,
                 such as large area overhead, under-utilization of
                 multipliers, and wasted memory space for low and mixed
                 bit-width operations.\par

                 This article introduces BISDU, a bit-serial dot-product
                 unit to support and accelerate execution of
                 mixed-precision low-bit QNNs on resource-constrained
                 microcontrollers. BISDU is a multiplier-less
                 dot-product unit, with frugal hardware requirements (a
                 population count unit and 2:1 multiplexers). The
                 proposed bit-serial dot-product unit leverages the
                 conventional logical operations of a microcontroller to
                 perform multiplications, which enables efficient
                 software implementations of binary (Xnor), ternary
                 (Xor), and mixed-precision [W $ \times $ A] (And)
                 dot-product operations.\par

                 The experimental results show that BISDU achieves
                 competitive performance compared to two
                 state-of-the-art units, XpulpNN and Dustin, when
                 executing low-bit-width CNNs. We demonstrate the
                 advantage that bit-serial execution provides by
                 enabling trading accuracy against weight footprint and
                 execution time. BISDU increases the area of the ALU by
                 68\% and the ALU power consumption by 42\% compared to
                 a baseline 32-bit RISC-V (RV32IC) microcontroller core.
                 In comparison, XpulpNN and Dustin increase the area by
                 6.9$ \times $ and 11.1 $ \times $ and the power
                 consumption by 3.8$ \times $ and 5.97$ \times $,
                 respectively. The bit-serial state-of-the-art, based on
                 a conventional popcount instruction, increases the area
                 by 42\% and power by 32\%, with BISDU providing a 37\%
                 speedup over it.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kwon:2023:LRT,
  author =       "Hyeokdong Kwon and Hyunjun Kim and Minjoo Sim and
                 Wai-Kong Lee and Hwajeong Seo",
  title =        "Look-up the Rainbow: Table-based Implementation of
                 Rainbow Signature on 64-bit {ARMv8} Processors",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "80:1--80:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607140",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607140",
  abstract =     "The Rainbow Signature Scheme is one of the finalists
                 in the National Institute of Standards and Technology
                 (NIST) Post-Quantum Cryptography (PQC) standardization
                 competition, but failed to win because it has lack of
                 stability in the parameter selection \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Schneider:2023:CCM,
  author =       "Klaus Schneider and Anoop Bhagyanath",
  title =        "Consistency Constraints for Mapping Dataflow Graphs to
                 Hybrid Dataflow\slash {von Neumann} Architectures",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "81:1--81:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607869",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607869",
  abstract =     "Dataflow process networks (DPNs) provide a convenient
                 model of computation that is often used to model system
                 behavior in model-based designs. With fixed sets of
                 nodes, they are also used as dataflow graphs as an
                 intermediate program representation by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Caronti:2023:FGH,
  author =       "Luca Caronti and Khakim Akhunov and Matteo Nardello
                 and Kasim Sinan Yildirim and Davide Brunelli",
  title =        "Fine-grained Hardware Acceleration for Efficient
                 Batteryless Intermittent Inference on the Edge",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "82:1--82:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608475",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608475",
  abstract =     "Backing up the intermediate results of
                 hardware-accelerated deep inference is crucial to
                 ensure the progress of execution on batteryless
                 computing platforms. However, hardware accelerators in
                 low-power AI platforms only support the one-shot atomic
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Lei:2023:FII,
  author =       "Douwei Lei and Debiao He and Cong Peng and Min Luo and
                 Zhe Liu and Xinyi Huang",
  title =        "Faster Implementation of Ideal Lattice-Based
                 Cryptography Using {AVX512}",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "83:1--83:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609223",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609223",
  abstract =     "With the development of quantum computing, the
                 existing cryptography schemes based on classical
                 cryptographic primitives will no longer be secure.
                 Hence, cryptographers are designing post-quantum
                 cryptographic (PQC) schemes, and ideal lattice-based
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2023:RCR,
  author =       "Wei-Ju Chen and Peng Wu and Pei-Chi Huang and Aloysius
                 K. Mok and Song Han",
  title =        "Regular Composite Resource Partitioning and
                 Reconfiguration in Open Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "84:1--84:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609424",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609424",
  abstract =     "We consider the problem of resource provisioning for
                 real-time cyber-physical applications in an open system
                 environment where there does not exist a global
                 resource scheduler that has complete knowledge of the
                 real-time performance requirements of each \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Inagaki:2023:PSC,
  author =       "Saya Inagaki and Mingyu Yang and Yang Li and Kazuo
                 Sakiyama and Yuko Hara-Azumi",
  title =        "Power Side-channel Attack Resistant Circuit Designs of
                 {ARX} Ciphers Using High-level Synthesis",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "85:1--85:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609507",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609507",
  abstract =     "In the Internet of Things (IoT) era, edge devices have
                 been considerably diversified and are often designed
                 using high-level synthesis (HLS) for improved design
                 productivity. However, HLS tools were originally
                 developed in a security-unaware manner, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Luo:2023:SEF,
  author =       "Yuling Luo and Shiqi Zhang and Shunsheng Zhang and
                 Junxiu Liu and Yanhu Wang and Su Yang",
  title =        "A Secure and Efficient Framework for Outsourcing
                 Large-scale Matrix Determinant and Linear Equations",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "86:1--86:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3611014",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611014",
  abstract =     "Large-scale matrix determinants and linear equations
                 are two basic computational tools in science and
                 engineering fields. However, it is difficult for a
                 resource-constrained client to solve large-scale
                 computational tasks. Cloud computing service provides
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Calsi:2023:IRA,
  author =       "Davide Li Calsi and Vittorio Zaccaria",
  title =        "Interruptible Remote Attestation of Low-end {IoT}
                 Microcontrollers via Performance Counters",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "87:1--87:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3611674",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611674",
  abstract =     "Remote attestation is a method used in distributed
                 systems to detect integrity violations on a target
                 device (prover) through a challenge-response protocol
                 initiated by a verifier device. The prover calculates a
                 hash of its memory, which is compared to a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Indrusiak:2023:RTG,
  author =       "Leandro Soares Indrusiak and Alan Burns",
  title =        "Real-Time Guarantees in Routerless Networks-on-Chip",
  journal =      j-TECS,
  volume =       "22",
  number =       "5",
  pages =        "88:1--88:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3616539",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 2 15:31:58 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616539",
  abstract =     "This article considers the use of routerless
                 networks-on-chip as an alternative on-chip interconnect
                 for multi-processor systems requiring hard real-time
                 guarantees for inter-processor communication. It
                 presents a novel analytical framework that can
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Liang:2023:SIA,
  author =       "Yun (Eric) Liang and Wei Zhang and Stephen
                 Neuendorffer and Wayne Luk",
  title =        "Special Issue: {``AI Acceleration on FPGAs''}",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "89:1--89:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3626323",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626323",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hu:2023:HPR,
  author =       "Xianghong Hu and Hongmin Huang and Xueming Li and Xin
                 Zheng and Qinyuan Ren and Jingyu He and Xiaoming
                 Xiong",
  title =        "High-performance Reconfigurable {DNN} Accelerator on a
                 Bandwidth-limited Embedded System",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "90:1--90:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530818",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530818",
  abstract =     "Deep convolutional neural networks (DNNs) have been
                 widely used in many applications, particularly in
                 machine vision. It is challenging to accelerate DNNs on
                 embedded systems because real-world machine vision
                 applications should reserve a lot of external
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "90",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2023:FCF,
  author =       "Xiaoyang Wang and Zhe Zhou and Zhihang Yuan and
                 Jingchen Zhu and Yulong Cao and Yao Zhang and Kangrui
                 Sun and Guangyu Sun",
  title =        "{FD-CNN}: a Frequency-Domain {FPGA} Acceleration
                 Scheme for {CNN}-Based Image-Processing Applications",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "91:1--91:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3559105",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3559105",
  abstract =     "In the emerging edge-computing scenarios, FPGAs have
                 been widely adopted to accelerate convolutional neural
                 network (CNN)-based image-processing applications, such
                 as image classification, object detection, and image
                 segmentation, and so on. A standard \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "91",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ma:2023:ICD,
  author =       "Zhengzheng Ma and Tuo Dai and Xuechao Wei and Guojie
                 Luo",
  title =        "An Intermediate-Centric Dataflow for Transposed
                 Convolution Acceleration on {FPGA}",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "92:1--92:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561053",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561053",
  abstract =     "Transposed convolution has been prevailing in
                 convolutional neural networks (CNNs), playing an
                 important role in multiple scenarios such as image
                 segmentation and back-propagation process of training
                 CNNs. This mainly benefits from the ability to up-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "92",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ye:2023:AAM,
  author =       "Wenhua Ye and Xu Zhou and Joey Zhou and Cen Chen and
                 Kenli Li",
  title =        "Accelerating Attention Mechanism on {FPGAs} based on
                 Efficient Reconfigurable Systolic Array",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "93:1--93:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3549937",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3549937",
  abstract =     "Transformer model architectures have recently received
                 great interest in natural language, machine
                 translation, and computer vision, where attention
                 mechanisms are their building blocks. However, the
                 attention mechanism is expensive because of its
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "93",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alam:2023:RIF,
  author =       "Syed Asad Alam and David Gregg and Giulio Gambardella
                 and Thomas Preusser and Michaela Blott",
  title =        "On the {RTL} Implementation of {FINN} Matrix Vector
                 Unit",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "94:1--94:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3547141",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3547141",
  abstract =     "Field-programmable gate array (FPGA)-based
                 accelerators are becoming increasingly popular for deep
                 neural network (DNN) inference due to their ability to
                 scale performance with increasing degrees of
                 specialization with dataflow architectures or custom
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "94",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Feng:2023:ADS,
  author =       "Kaijie Feng and Xiaoya Fan and Jianfeng An and Chuxi
                 Li and Kaiyue Di and Jiangfei Li",
  title =        "{ACDSE}: a Design Space Exploration Method for {CNN}
                 Accelerator based on Adaptive Compression Mechanism",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "95:1--95:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3545177",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3545177",
  abstract =     "Customized accelerators for Convolutional Neural
                 Network (CNN) can achieve better energy efficiency than
                 general computing platforms. However, the design of a
                 high-performance accelerator should take into account a
                 variety of parameters and physical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "95",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shu:2023:TID,
  author =       "Jiwu Shu and Kedong Fang and Youmin Chen and Shuo
                 Wang",
  title =        "{TH-iSSD}: Design and Implementation of a Generic and
                 Reconfigurable Near-Data Processing Framework",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "96:1--96:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563456",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563456",
  abstract =     "We present the design and implementation of TH-iSSD, a
                 near-data processing framework to address the data
                 movement problem. TH-iSSD does not pose any restriction
                 to the hardware selection and is highly
                 reconfigurable-its core components, such as the on-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "96",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fu:2023:RRB,
  author =       "Yu Fu and Jingqiang Lin and Dengguo Feng and Wei Wang
                 and Mingyu Wang and Wenjie Wang",
  title =        "{RegKey}: a Register-based Implementation of {ECC}
                 Signature Algorithms Against One-shot Memory
                 Disclosure",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "97:1--97:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604805",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604805",
  abstract =     "To ensure the security of cryptographic algorithm
                 implementations, several cryptographic key protection
                 schemes have been proposed to prevent various memory
                 disclosure attacks. Among them, the register-based
                 solutions do not rely on special hardware \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "97",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Min:2023:SBM,
  author =       "Chulhong Min and Akhil Mathur and Utku G{\"u}nay Acer
                 and Alessandro Montanari and Fahim Kawsar",
  title =        "{SensiX++}: Bringing {MLOps} and Multi-tenant Model
                 Serving to Sensory Edge Devices",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "98:1--98:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617507",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617507",
  abstract =     "We present SensiX++, a multi-tenant runtime for
                 adaptive model execution with integrated MLOps on edge
                 devices, e.g., a camera, a microphone, or IoT sensors.
                 SensiX++ operates on two fundamental principles: highly
                 modular componentisation to externalise \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "98",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{ElYaacoub:2023:SDS,
  author =       "Ahmed {El Yaacoub} and Luca Mottola and Thiemo Voigt
                 and Philipp R{\"u}mmer",
  title =        "Scheduling Dynamic Software Updates in Mobile Robots",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "99:1--99:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3623676",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3623676",
  abstract =     "We present NeRTA (Next Release Time Analysis), a
                 technique to enable dynamic software updates for
                 low-level control software of mobile robots. Dynamic
                 software updates enable software correction and
                 evolution during system operation. In mobile robotics,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "99",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Samaddar:2023:ODS,
  author =       "Ankita Samaddar and Arvind Easwaran",
  title =        "Online Distributed Schedule Randomization to Mitigate
                 Timing Attacks in Industrial Control Systems",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "100:1--100:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624584",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624584",
  abstract =     "Industrial control systems (ICSs) consist of a large
                 number of control applications that are associated with
                 periodic real-time flows with hard deadlines. To
                 facilitate large-scale integration, remote control, and
                 co-ordination, wireless sensor and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "100",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2023:SFA,
  author =       "Jun-Shen Wu and Tsen-Wei Hsu and Ren-Shuo Liu",
  title =        "{SG-Float}: Achieving Memory Access and Computing
                 Power Reduction Using Self-Gating Float in {CNNs}",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "101:1--101:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624582",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624582",
  abstract =     "Convolutional neural networks (CNNs) are essential for
                 advancing the field of artificial intelligence.
                 However, since these networks are highly demanding in
                 terms of memory and computation, implementing CNNs can
                 be challenging. To make CNNs more \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "101",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Hung:2023:EEC,
  author =       "Chen-Tui Hung and Kai Xuan Lee and Yi-Zheng Liu and
                 Ya-Shu Chen and Zhong-Han Chan",
  title =        "Energy-Efficient Communications for Improving Timely
                 Progress of Intermittent-Powered {BLE} Devices",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "102:1--102:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3626197",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626197",
  abstract =     "Battery-less devices offer potential solutions for
                 maintaining sustainable Internet of Things (IoT)
                 networks. However, limited energy harvesting capacity
                 can lead to power failures, limiting the system's
                 quality of service (QoS). To improve timely task
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "102",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Javadi:2023:CME,
  author =       "Mohammad Haji Seyed Javadi and Mohsen Faryabi and
                 Hamid Reza Mahdiani",
  title =        "A Comprehensive Model for Efficient Design Space
                 Exploration of Imprecise Computational Blocks",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "103:1--103:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3625555",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625555",
  abstract =     "After almost a decade of research, development of more
                 efficient imprecise computational blocks is still a
                 major concern in imprecise computing domain. There are
                 many instances of the introduced imprecise components
                 of different types, while their main \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "103",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Siddhu:2023:DTM,
  author =       "Lokesh Siddhu and Aritra Bagchi and Rajesh Kedia and
                 Isaar Ahmad and Shailja Pandey and Preeti Ranjan
                 Panda",
  title =        "Dynamic Thermal Management of {$3$D} Memory through
                 Rotating Low Power States and Partial Channel Closure",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "104:1--104:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624581",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624581",
  abstract =     "Modern high-performance and high-bandwidth
                 three-dimensional (3D) memories are characterized by
                 frequent heating. Prior art suggests turning off hot
                 channels and migrating data to the background DDR
                 memory, incurring significant performance and energy
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "104",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wang:2023:EBN,
  author =       "Erwei Wang and James J. Davis and Daniele Moro and
                 Piotr Zielinski and Jia Jie Lim and Claudionor Coelho
                 and Satrajit Chatterjee and Peter Y. K. Cheung and
                 George A. Constantinides",
  title =        "Enabling Binary Neural Network Training on the Edge",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "105:1--105:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3626100",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626100",
  abstract =     "The ever-growing computational demands of increasingly
                 complex machine learning models frequently necessitate
                 the use of powerful cloud-based infrastructure for
                 their training. Binary neural networks are known to be
                 promising candidates for on-device \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "105",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Farahmand:2023:DAH,
  author =       "Ebrahim Farahmand and Ali Mahani and Muhammad Abdullah
                 Hanif and Muhammad Shafique",
  title =        "Design and Analysis of High Performance Heterogeneous
                 Block-based Approximate Adders",
  journal =      j-TECS,
  volume =       "22",
  number =       "6",
  pages =        "106:1--106:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3625686",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625686",
  abstract =     "Approximate computing is an emerging paradigm to
                 improve the power and performance efficiency of
                 error-resilient applications. As adders are one of the
                 key components in almost all processing systems, a
                 significant amount of research has been carried out
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "106",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Casini:2024:ISI,
  author =       "Daniel Casini and Dakshina Dasari and Matthias Becker
                 and Giorgio Buttazzo",
  title =        "Introduction to the Special Issue on Real-Time
                 Computing in the {IoT}-to-Edge-to-Cloud Continuum",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605180",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605180",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Chen:2024:DTO,
  author =       "Ying Chen and Jie Zhao and Jintao Hu and Shaohua Wan
                 and Jiwei Huang",
  title =        "Distributed Task Offloading and Resource Purchasing in
                 {NOMA-Enabled} Mobile Edge Computing: Hierarchical Game
                 Theoretical Approaches",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597023",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597023",
  abstract =     "As the computing resources and the battery capacity of
                 mobile devices are usually limited, it is a feasible
                 solution to offload the computation-intensive tasks
                 generated by mobile devices to edge servers (ESs) in
                 mobile edge computing (MEC). In this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Cucinotta:2024:MCO,
  author =       "Tommaso Cucinotta and Alexandre Amory and Gabriele Ara
                 and Francesco Paladino and Marco {Di Natale}",
  title =        "Multi-criteria Optimization of Real-time {DAGs} on
                 Heterogeneous Platforms under {P-EDF}",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592609",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592609",
  abstract =     "This article tackles the problem of optimal placement
                 of complex real-time embedded applications on
                 heterogeneous platforms. Applications are composed of
                 directed acyclic graphs of tasks, with each
                 directed-acyclic-graph (DAG) having a minimum inter-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Struhar:2024:HRO,
  author =       "V{\'a}clav Struh{\'a}r and Silviu S. Craciunas and
                 Mohammad Ashjaei and Moris Behnam and Alessandro V.
                 Papadopoulos",
  title =        "Hierarchical Resource Orchestration Framework for
                 Real-time Containers",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592856",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib;
                 https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592856",
  abstract =     "Container-based virtualization is a promising
                 deployment model in fog and edge computing
                 applications, because it allows a seamless co-existence
                 of virtualized applications in a heterogeneous
                 environment without introducing significant overhead.
                 Certain \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Barletta:2024:CAM,
  author =       "Marco Barletta and Marcello Cinque and Luigi {De
                 Simone} and Raffaele {Della Corte}",
  title =        "Criticality-aware Monitoring and Orchestration for
                 Containerized Industry 4.0 Environments",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604567",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604567",
  abstract =     "The evolution of industrial environments makes the
                 reconfigurability and flexibility key requirements to
                 rapidly adapt to changeable market needs. Computing
                 paradigms like Edge/Fog computing are able to provide
                 the required flexibility and scalability \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ghosh:2024:PPA,
  author =       "Soumendu Kumar Ghosh and Arnab Raha and Vijay
                 Raghunathan and Anand Raghunathan",
  title =        "{PArtNNer}: Platform-Agnostic Adaptive Edge-Cloud
                 {DNN} Partitioning for Minimizing End-to-End Latency",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3630266",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3630266",
  abstract =     "The last decade has seen the emergence of Deep Neural
                 Networks (DNNs) as the de facto algorithm for various
                 computer vision applications. In intelligent edge
                 devices, sensor data streams acquired by the device are
                 processed by a DNN application running on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Xu:2024:SLB,
  author =       "Haitao Xu and Saiyu Qi and Yong Qi and Wei Wei and
                 Naixue Xiong",
  title =        "Secure and Lightweight Blockchain-based Truthful Data
                 Trading for Real-Time Vehicular Crowdsensing",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3582008",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582008",
  abstract =     "As the number of smart cars grows rapidly, vehicular
                 crowdsensing (VCS) is gradually becoming popular. In a
                 VCS infrastructure, sensing devices and computing units
                 hold on smart cars as well as cloud servers form an
                 IoT-edge-cloud continuum to perform \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Oza:2024:DAT,
  author =       "Pratham Oza and Nathaniel Hudson and Thidapat Chantem
                 and Hana Khamfroush",
  title =        "Deadline-Aware Task Offloading for Vehicular Edge
                 Computing Networks Using Traffic Light Data",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3594541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594541",
  abstract =     "As vehicles have become increasingly automated, novel
                 vehicular applications have emerged to enhance the
                 safety and security of the vehicles and improve user
                 experience. This brings ever-increasing data and
                 resource requirements for timely computation by
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gaitan:2024:MOC,
  author =       "Miguel Guti{\'e}rrez Gait{\'a}n and Lu{\'\i}s Almeida
                 and Pedro M. D'orey and Pedro M. Santos and Thomas
                 Watteyne",
  title =        "Minimal-Overlap Centrality for Multi-Gateway
                 Designation in Real-Time {TSCH} Networks",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610583",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610583",
  abstract =     "This article presents a novel centrality-driven
                 gateway designation framework for the improved
                 real-time performance of low-power wireless sensor
                 networks (WSNs) at system design time. We target
                 time-synchronized channel hopping (TSCH) WSNs with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Iyer:2024:HCM,
  author =       "Vishnuvardhan V. Iyer and Aditya Thimmaiah and Michael
                 Orshansky and Andreas Gerstlauer and Ali E. Yilmaz",
  title =        "A Hierarchical Classification Method for High-accuracy
                 Instruction Disassembly with Near-field {EM}
                 Measurements",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3629167",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3629167",
  abstract =     "Electromagnetic (EM) fields have been extensively
                 studied as potent side-channel tools for testing the
                 security of hardware implementations. In this work, a
                 low-cost side-channel disassembler that uses
                 fine-grained EM signals to predict a program's
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhang:2024:EAA,
  author =       "Yi-Wen Zhang and Hui Zheng and Zonghua Gu",
  title =        "Energy-Aware Adaptive Mixed-Criticality Scheduling
                 with Semi-Clairvoyance and Graceful Degradation",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632749",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632749",
  abstract =     "The classic Mixed-Criticality System (MCS) task model
                 is a non-clairvoyance model in which the change of the
                 system behavior is based on the completion of
                 high-criticality tasks while dropping low-criticality
                 tasks in high-criticality mode. In this paper,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bagchi:2024:CCA,
  author =       "Aritra Bagchi and Dinesh Joshi and Preeti Ranjan
                 Panda",
  title =        "{COBRRA}: {COntention-aware} cache Bypass with
                 Request-Response Arbitration",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632748",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632748",
  abstract =     "In modern multi-processor systems-on-chip (MPSoCs),
                 requests from different processor cores, accelerators,
                 and their responses from the lower-level memory contend
                 for the shared cache bandwidth, making it a critical
                 performance bottleneck. Prior research \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Shin:2024:VEM,
  author =       "Yong-Jun Shin and Donghwan Shin and Doo-Hwan Bae",
  title =        "Virtual Environment Model Generation for {CPS} Goal
                 Verification using Imitation Learning",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3633804",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3633804",
  abstract =     "Cyber-Physical Systems (CPS) continuously interact
                 with their physical environments through embedded
                 software controllers that observe the environments and
                 determine actions. Field Operational Tests (FOT) are
                 essential to verify to what extent the CPS \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Yu:2024:MAE,
  author =       "Wangyang Yu and Jinming Kong and Zhijun Ding and
                 Xiaojun Zhai and Zhiqiang Li and Qi Guo",
  title =        "Modeling and Analysis of {ETC} Control System with
                 Colored {Petri} Net and Dynamic Slicing",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3633450",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3633450",
  abstract =     "Nowadays, Electronic Toll Collection (ETC) control
                 systems have been widely adopted to smoothen traffic
                 flow on highways. However, as it is a complex business
                 interaction system, there are inevitably flaws in its
                 control logic process, such as the problem \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{He:2024:REA,
  author =       "Zhijian He and Bohuan Xue and Xiangcheng Hu and
                 Zhaoyan Shen and Xiangyue Zeng and Ming Liu",
  title =        "Robust Embedded Autonomous Driving Positioning System
                 Fusing {LiDAR} and Inertial Sensors",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3626098",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626098",
  abstract =     "Autonomous driving emphasizes precise multi-sensor
                 fusion positioning on limit resource embedded systems.
                 LiDAR-centered sensor fusion system serves as a
                 mainstream navigation system due to its insensitivity
                 to illumination and viewpoint change. However,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Qi:2024:MCS,
  author =       "Huamei Qi and Fang Ren and Leilei Wang and Ping Jiang
                 and Shaohua Wan and Xiaoheng Deng",
  title =        "Multi-Compression Scale {DNN} Inference Acceleration
                 based on Cloud-Edge-End Collaboration",
  journal =      j-TECS,
  volume =       "23",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3634704",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Feb 3 11:10:48 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3634704",
  abstract =     "Edge intelligence has emerged as a promising paradigm
                 to accelerate DNN inference by model partitioning,
                 which is particularly useful for intelligent scenarios
                 that demand high accuracy and low latency. However, the
                 dynamic nature of the edge environment \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Que:2024:LGL,
  author =       "Zhiqiang Que and Hongxiang Fan and Marcus Loo and He
                 Li and Michaela Blott and Maurizio Pierini and
                 Alexander Tapper and Wayne Luk",
  title =        "{LL-GNN}: Low Latency Graph Neural Networks on {FPGAs}
                 for High Energy Physics",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3640464",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3640464",
  abstract =     "This work presents a novel reconfigurable architecture
                 for Low Latency Graph Neural Network (LL-GNN) designs
                 for particle detectors, delivering unprecedented low
                 latency performance. Incorporating FPGA-based GNNs into
                 particle detectors presents a unique \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alsubhi:2024:SFE,
  author =       "Arwa Alsubhi and Simeon Babatunde and Nicole Tobias
                 and Jacob Sorber",
  title =        "{Stash}: Flexible Energy Storage for Intermittent
                 Sensors",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3641511",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3641511",
  abstract =     "Batteryless sensors promise a sustainable future for
                 sensing, but they face significant challenges when
                 storing and using environmental energy. Incoming energy
                 can fluctuate unpredictably between periods of scarcity
                 and abundance, and device performance \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Zhao:2024:ICV,
  author =       "Liang Zhao and Hongxuan Li and Enchao Zhang and Ammar
                 Hawbani and Mingwei Lin and Shaohua Wan and Mohsen
                 Guizani",
  title =        "Intelligent Caching for Vehicular Dew Computing in
                 Poor Network Connectivity Environments",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3643038",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3643038",
  abstract =     "In vehicular networks, some edge servers may not
                 function properly due to the time-varying load
                 condition and the uneven computing resource
                 distribution, resulting in a low quality of caching
                 services. To overcome this challenge, we develop a
                 Vehicular \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Sah:2024:ATE,
  author =       "Ramesh Kumar Sah and Hassan Ghasemzadeh",
  title =        "Adversarial Transferability in Embedded Sensor
                 Systems: an Activity Recognition Perspective",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3641861",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3641861",
  abstract =     "Machine learning algorithms are increasingly used for
                 inference and decision-making in embedded systems. Data
                 from sensors are used to train machine learning models
                 for various smart functions of embedded and
                 cyber-physical systems ranging from \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Kornaros:2024:FUI,
  author =       "George Kornaros and Svoronos Leivadaros and Filippos
                 Kolimbianakis",
  title =        "Flexible Updating of {Internet} of Things Computing
                 Functions through Optimizing Dynamic Partial
                 Reconfiguration",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3643825",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3643825",
  abstract =     "With applications to become increasingly compute- and
                 data-intensive, requiring more processing power, many
                 Internet of Things (IoT) platforms in robots, drones,
                 and autonomous vehicles that implement neural network
                 inference, cryptographic functions or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Fatnassi:2024:PNN,
  author =       "Wael Fatnassi and Yasser Shoukry",
  title =        "{PolyARBerNN}: a Neural Network Guided Solver and
                 Optimizer for Bounded Polynomial Inequalities",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632970",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632970",
  abstract =     "Constraints solvers play a significant role in the
                 analysis, synthesis, and formal verification of complex
                 cyber-physical systems. In this article, we study the
                 problem of designing a scalable constraints solver for
                 an important class of constraints named \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Li:2024:CIS,
  author =       "Lu Li and Qi Tian and Guofeng Qin and Shuaiyu Chen and
                 Weijia Wang",
  title =        "Compact Instruction Set Extensions for Dilithium",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3643826",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3643826",
  abstract =     "Post-quantum cryptography is considered to provide
                 security against both traditional and quantum computer
                 attacks. Dilithium is a digital signature algorithm
                 that derives its security from the challenge of finding
                 short vectors in lattices. It has been \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Wu:2024:SGC,
  author =       "Chin-Hsien Wu and Cheng-Tze Lee and Yi-Ren Tsai and
                 Cheng-Yen Wu",
  title =        "A Space-Grained Cleaning Method to Reduce Long-Tail
                 Latency of {DM-SMR} Disks",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3643827",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3643827",
  abstract =     "DM-SMR (device-managed shingled magnetic recording)
                 disks allocate a portion of disk space as the
                 persistent cache (PC) to address the issue of
                 overlapping tracks during data updates. When the PC
                 space becomes insufficient, a space cleaning is
                 triggered \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Deng:2024:SST,
  author =       "Jianing Deng and Shunjie Dong and Lvcheng Chen and
                 Jingtong Hu and Cheng Zhuo",
  title =        "{STDF}: Spatio-Temporal Deformable Fusion for Video
                 Quality Enhancement on Embedded Platforms",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3645113",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3645113",
  abstract =     "With the development of embedded systems and deep
                 learning, it is feasible to combine them for offering
                 various and convenient human-centered services, which
                 is based on high-quality (HQ) videos. However, due to
                 the limit of video traffic load and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Bhasin:2024:SIP,
  author =       "Shivam Bhasin and Fabrizio {De Santis} and Francesco
                 Regazzoni",
  title =        "Special Issue on Post-Quantum Cryptography for
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "26:1--26:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3641852",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3641852",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Mujdei:2024:SCA,
  author =       "Catinca Mujdei and Lennert Wouters and Angshuman
                 Karmakar and Arthur Beckers and Jose Maria Bermudo Mera
                 and Ingrid Verbauwhede",
  title =        "Side-channel Analysis of Lattice-based Post-quantum
                 Cryptography: Exploiting Polynomial Multiplication",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "27:1--27:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3569420",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569420",
  abstract =     "Polynomial multiplication algorithms such as
                 Toom--Cook and the Number Theoretic Transform are
                 fundamental building blocks for lattice-based
                 post-quantum cryptography. In this work we present
                 correlation power-analysis-based side-channel analysis
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Gupta:2024:MEE,
  author =       "Saransh Gupta and Rosario Cammarota and Tajana
                 Simuni'c",
  title =        "{MemFHE}: End-to-end Computing with Fully Homomorphic
                 Encryption in Memory",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "28:1--28:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3569955",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3569955",
  abstract =     "The increasing amount of data and the growing
                 complexity of problems have resulted in an ever-growing
                 reliance on cloud computing. However, many
                 applications, most notably in healthcare, finance, or
                 defense, demand security and privacy, which today's
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Thoma:2024:AAS,
  author =       "Jan Philipp Thoma and Darius Hartlief and Tim
                 G{\"u}neysu",
  title =        "Agile Acceleration of Stateful Hash-based Signatures
                 in Hardware",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "29:1--29:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3567426",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567426",
  abstract =     "With the development of large-scale quantum computers,
                 the current landscape of asymmetric cryptographic
                 algorithms will change dramatically. Today's standards
                 like RSA, DSA, and ElGamal will no longer provide
                 sufficient security against quantum attackers
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Karl:2024:PQS,
  author =       "Patrick Karl and Jonas Schupp and Tim Fritzmann and
                 Georg Sigl",
  title =        "Post-Quantum Signatures on {RISC-V} with Hardware
                 Acceleration",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3579092",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/risc-v.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579092",
  abstract =     "CRYSTALS-Dilithium and Falcon are digital signature
                 algorithms based on cryptographic lattices, which are
                 considered secure even if large-scale quantum computers
                 will be able to break conventional public-key
                 cryptography. Both schemes have been selected
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Elkhatib:2024:CEF,
  author =       "Rami Elkhatib and Brian Koziel and Reza Azarderakhsh
                 and Mehran Mozaffari Kermani",
  title =        "Cryptographic Engineering a Fast and Efficient {SIKE}
                 in {FPGA}",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3584919",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3584919",
  abstract =     "Recent attacks have shown that SIKE is not secure and
                 should not be used in its current state. However, this
                 work was completed before these attacks were discovered
                 and might be beneficial to other cryptosystems such as
                 SQISign. The primary downside of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Singh:2024:AEF,
  author =       "Richa Singh and Saad Islam and Berk Sunar and Patrick
                 Schaumont",
  title =        "Analysis of {EM} Fault Injection on Bit-sliced Number
                 Theoretic Transform Software in {Dilithium}",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3583757",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3583757",
  abstract =     "Bitslicing is a software implementation technique that
                 treats an N -bit processor datapath as N parallel
                 single-bit datapaths. Bitslicing is particularly useful
                 to implement data-parallel algorithms, algorithms that
                 apply the same operation sequence to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Jati:2024:CCK,
  author =       "Arpan Jati and Naina Gupta and Anupam Chattopadhyay
                 and Somitra Kumar Sanadhya",
  title =        "A Configurable {CRYSTALS--Kyber} Hardware
                 Implementation with Side-Channel Protection",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587037",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587037",
  abstract =     "In this work, we present a configurable and side
                 channel resistant implementation of the post-quantum
                 key-exchange algorithm CRYSTALS-Kyber. The implemented
                 design can be configured for different performance and
                 area requirements leading to different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Alnahawi:2024:TNG,
  author =       "Nouri Alnahawi and Nicolai Schmitt and Alexander
                 Wiesmaier and Chiara-Marie Zok",
  title =        "Toward Next Generation Quantum-Safe {eIDs} and
                 {eMRTDs}: a Survey",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3585517",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3585517",
  abstract =     "Security mechanisms of Electronic Personal Documents
                 (eCards) depend on (asymmetric) cryptography that is
                 and always has been subject to the threat of
                 compromise, be it from conventional attacks or quantum
                 computers. With Post-Quantum Cryptography (PQC),
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}

@Article{Ravi:2024:SCF,
  author =       "Prasanna Ravi and Anupam Chattopadhyay and Jan Pieter
                 D'Anvers and Anubhab Baksi",
  title =        "Side-channel and Fault-injection attacks over
                 Lattice-based Post-quantum Schemes ({Kyber},
                 {Dilithium}): Survey and New Results",
  journal =      j-TECS,
  volume =       "23",
  number =       "2",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3603170",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Apr 10 08:49:11 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/tecs.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3603170",
  abstract =     "In this work, we present a systematic study of
                 Side-Channel Attacks (SCA) and Fault Injection Attacks
                 (FIA) on structured lattice-based schemes, with main
                 focus on Kyber Key Encapsulation Mechanism (KEM) and
                 Dilithium signature scheme, which are leading
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Embed. Comput. Syst.",
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "https://dl.acm.org/loi/tecs",
}