%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.54",
%%%     date            = "06 October 2014",
%%%     time            = "16:12:41 MDT",
%%%     filename        = "tecs.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "10627 27214 143924 1386287",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "bibliography; BibTeX; ACM Transactions on
%%%                        Embedded Computing Systems (TECS)",
%%%     license         = "public domain",
%%%     supported       = "no",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        the journal ACM Transactions on Embedded
%%%                        Computing Systems (no CODEN, ISSN 1539-9087
%%%                        (print), 1558-3465 (electronic)), for
%%%                        2002--date.
%%%
%%%                        Publication began with volume 1, number 1,
%%%                        in November 2002.  The journal appears
%%%                        quarterly, in February, May, August, and
%%%                        November.
%%%
%%%                        The journal has a World-Wide Web site at:
%%%
%%%                            http://www.acm.org/pubs/tecs
%%%                            http://portal.acm.org/browse_dl.cfm?&idx=J840
%%%
%%%                        Tables-of-contents of all issues are
%%%                        available at:
%%%
%%%                            http://www.acm.org/pubs/contents/journals/tecs/
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        At version 1.54, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2002 (   7)    2007 (  39)    2012 (  89)
%%%                             2003 (  24)    2008 (  47)    2013 ( 152)
%%%                             2004 (  36)    2009 (  39)    2014 (  90)
%%%                             2005 (  39)    2010 (  58)
%%%                             2006 (  30)    2011 (  19)
%%%
%%%                             Article:        669
%%%
%%%                             Total entries:  669
%%%
%%%                        Spelling has been verified with the UNIX
%%%                        spell and GNU ispell programs using the
%%%                        exception dictionary stored in the
%%%                        companion file with extension .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================

@Preamble{"\input bibnames.sty"}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TECS                  = "ACM Transactions on Embedded Computing
                                  Systems"}

%%% ====================================================================
%%% Publisher abbreviations:

@String{pub-ACM                 = "ACM Press"}

@String{pub-ACM:adr             = "New York, NY 10036, USA"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Wolf:2002:III,
  author =       "Wayne Wolf",
  title =        "Introduction to the inaugural issue",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "1--1",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jacob:2002:ITS,
  author =       "Bruce Jacob and Shuvra Bhattacharyya",
  title =        "Introduction to the two special issues on memory",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "2--5",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Avissar:2002:OMA,
  author =       "Oren Avissar and Rajeev Barua and Dave Stewart",
  title =        "An optimal memory allocation scheme for
                 scratch-pad-based embedded systems",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "6--26",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chen:2002:TGC,
  author =       "G. Chen and R. Shetty and M. Kandemir and N.
                 Vijaykrishnan and M. J. Irwin and M. Wolczko",
  title =        "Tuning garbage collection for reducing memory system
                 energy in an embedded {Java} environment",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "27--55",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2002:AAI,
  author =       "Jung-Hoon Lee and Shin-Dug Kim and Charles Weems",
  title =        "Application-adaptive intelligent cache memory system",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "56--78",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yang:2002:FVL,
  author =       "Jun Yang and Rajiv Gupta",
  title =        "Frequent value locality and its applications",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "79--105",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ykman-Couvreur:2002:SLE,
  author =       "Ch. Ykman-Couvreur and J. Lambrecht and A. {Van Der
                 Togt} and F. Catthoor and H. {De Man}",
  title =        "System-level exploration of association table
                 implementations in telecom network applications",
  journal =      j-TECS,
  volume =       "1",
  number =       "1",
  pages =        "106--140",
  month =        nov,
  year =         "2002",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:40 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jacob:2003:ITS,
  author =       "Bruce Jacob and Shuvra Bhattacharyya",
  title =        "Introduction to the two special issues on memory",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "1--4",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Benini:2003:EAD,
  author =       "Luca Benini and Alberto Macii and Massimo Poncino",
  title =        "Energy-aware design of embedded memories: a survey
                 of technologies, architectures, and optimization
                 techniques",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "5--32",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Grun:2003:APB,
  author =       "Peter Grun and Nikil Dutt and Alex Nicolau",
  title =        "Access pattern-based memory and connectivity
                 architecture exploration",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "33--73",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Qu:2003:SSS,
  author =       "Gang Qu and Miodrag Potkonjak",
  title =        "System synthesis of synchronous multimedia
                 applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "74--97",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Shim:2003:LEC,
  author =       "Hojun Shim and Yongsoo Joo and Yongseok Choi and Hyung
                 Gyu Lee and Naehyuck Chang",
  title =        "Low-energy off-chip {SDRAM} memory systems for
                 embedded applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "1",
  pages =        "98--130",
  month =        feb,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Guang:2003:SIC,
  author =       "Gao Guang and Trevor Mudge",
  title =        "Special issue on compilers, architecture, and
                 synthesis for embedded systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "131--131",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Franke:2003:ARH,
  author =       "Bj{\"o}rn Franke and Michael O'Boyle",
  title =        "Array recovery and high-level transformations for
                 {DSP} applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "132--162",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2003:PIC,
  author =       "Soontae Kim and N. Vijaykrishnan and Mahmut Kandemir
                 and Anand Sivasubramaniam and Mary Jane Irwin",
  title =        "Partitioned instruction cache architecture for energy
                 efficiency",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "163--185",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Rabbah:2003:DRD,
  author =       "Rodric M. Rabbah and Krishna V. Palem",
  title =        "Data remapping for design space optimization of
                 embedded memory systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "186--218",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhao:2003:SRM,
  author =       "Qin Zhao and Bart Mesman and Twan Basten",
  title =        "Static resource models for code-size efficient
                 embedded processors",
  journal =      j-TECS,
  volume =       "2",
  number =       "2",
  pages =        "219--250",
  month =        may,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:41 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jacome:2003:SIP,
  author =       "Margarida Jacome and Francky Catthoor",
  title =        "Special issue on power-aware embedded computing",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "251--254",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Martin:2003:CSS,
  author =       "Thomas L. Martin and Daniel P. Siewiorek and Asim
                 Smailagic and Matthew Bosworth and Matthew Ettus and
                 Jolin Warren",
  title =        "A case study of a system-level approach to power-aware
                 computing",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "255--276",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Rakhmatov:2003:EMB,
  author =       "Daler Rakhmatov and Sarma Vrudhula",
  title =        "Energy management for battery-powered embedded
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "277--324",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Irani:2003:OSD,
  author =       "Sandy Irani and Sandeep Shukla and Rajesh Gupta",
  title =        "Online strategies for dynamic power management in
                 systems with multiple power-saving states",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "325--346",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhou:2003:AMC,
  author =       "Huiyang Zhou and Mark C. Toburen and Eric Rotenberg
                 and Thomas M. Conte",
  title =        "Adaptive mode control: a static-power-efficient
                 cache design",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "347--372",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Unsal:2003:CCC,
  author =       "Osman S. Unsal and Raksit Ashok and Israel Koren and
                 C. Mani Krishna and Csaba Andras Moritz",
  title =        "{Cool-Cache}: a compiler-enabled energy efficient
                 data caching framework for embedded/multimedia
                 processors",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "373--392",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yun:2003:EOV,
  author =       "Han-Saem Yun and Jihong Kim",
  title =        "On energy-optimal voltage scheduling for
                 fixed-priority hard real-time systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "393--430",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Schurgers:2003:PME,
  author =       "Curt Schurgers and Vijay Raghunathan and Mani B.
                 Srivastava",
  title =        "Power management for energy-aware communication
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "3",
  pages =        "431--447",
  month =        aug,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Aug 7 11:26:42 MDT 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gordon-Ross:2003:TIC,
  author =       "Ann Gordon-Ross and Susan Cotterell and Frank Vahid",
  title =        "Tiny instruction caches for low power embedded
                 systems",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "449--481",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lin:2003:CMC,
  author =       "Kelvin Lin and Chung-Ping Chung and Jean Jyh-Jiun
                 Shann",
  title =        "Compressing {MIPS} code by multiple operand
                 dependencies",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "482--508",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Musoll:2003:SRU,
  author =       "Enric Musoll",
  title =        "Speculating to reduce unnecessary power consumption",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "509--536",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Rusu:2003:MRR,
  author =       "Cosmin Rusu and Rami Melhem and Daniel Moss{\'e}",
  title =        "Maximizing rewards for real-time applications with
                 energy constraints",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "537--559",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Venkataramani:2003:ACC,
  author =       "Girish Venkataramani and Walid Najjar and Fadi Kurdahi
                 and Nader Bagherzadeh and Wim Bohm and Jeff Hammes",
  title =        "Automatic compilation to a coarse-grained
                 reconfigurable system-on-a-chip",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "560--589",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhuge:2003:CSR,
  author =       "Qingfeng Zhuge and Bin Xiao and Edwin H.-M. Sha",
  title =        "Code size reduction technique and implementation for
                 software-pipelined {DSP} applications",
  journal =      j-TECS,
  volume =       "2",
  number =       "4",
  pages =        "590--613",
  month =        nov,
  year =         "2003",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 22 17:52:29 MST 2003",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gupta:2004:GES,
  author =       "Rajesh Gupta",
  title =        "Guest editorial: {Special} issue on networked embedded
                 systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Raghunathan:2004:EEW,
  author =       "Vijay Raghunathan and Saurabh Ganeriwal and Mani
                 Srivastava and Curt Schurgers",
  title =        "Energy efficient wireless packet scheduling and fair
                 queuing",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "3--23",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Bulusu:2004:SCL,
  author =       "Nirupama Bulusu and John Heidemann and Deborah Estrin
                 and Tommy Tran",
  title =        "Self-configuring localization systems: Design and
                 Experimental Evaluation",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "24--60",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zou:2004:SDT,
  author =       "Yi Zou and Krishnendu Chakrabarty",
  title =        "Sensor deployment and target localization in
                 distributed sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "61--91",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gebotys:2004:DSC,
  author =       "Catherine H. Gebotys",
  title =        "Design of secure cryptography against the threat of
                 power-attacks in {DSP}-embedded processors",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "92--113",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mishra:2004:MVP,
  author =       "Prabhat Mishra and Nikil Dutt",
  title =        "Modeling and validation of pipeline specifications",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "114--139",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mishra:2004:PMC,
  author =       "Prabhat Mishra and Mahesh Mamidipaka and Nikil Dutt",
  title =        "Processor-memory coexploration using an architecture
                 description language",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "140--162",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Naik:2004:CCS,
  author =       "Mayur Naik and Jens Palsberg",
  title =        "Compiling with code-size constraints",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "163--181",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Schmitz:2004:ISO,
  author =       "Marcus T. Schmitz and Bashir M. Al-Hashimi and Petru
                 Eles",
  title =        "Iterative schedule optimization for voltage scalable
                 distributed embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "182--217",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Stitt:2004:ESS,
  author =       "Greg Stitt and Frank Vahid and Shawn Nematbakhsh",
  title =        "Energy savings and speedups from partitioning critical
                 software loops to hardware in embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "1",
  pages =        "218--232",
  month =        feb,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Mar 6 07:14:21 MST 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lach:2004:ESI,
  author =       "John Lach and Kia Bazargan",
  title =        "Editorial: {Special} issue on dynamically adaptable
                 embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "233--236",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ghiasi:2004:OAM,
  author =       "Soheil Ghiasi and Ani Nahapetian and Majid
                 Sarrafzadeh",
  title =        "An optimal algorithm for minimizing run-time
                 reconfiguration delay",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "237--256",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Robertson:2004:DFP,
  author =       "Ian Robertson and James Irvine",
  title =        "A design flow for partially reconfigurable hardware",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "257--283",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mejia-Alvarez:2004:ASS,
  author =       "Pedro Mejia-Alvarez and Eugene Levner and Daniel
                 Moss{\'e}",
  title =        "Adaptive scheduling server for power-aware real-time
                 tasks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "284--306",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2004:BAP,
  author =       "Fan Zhang and Samuel T. Chanson",
  title =        "Blocking-aware processor voltage scheduling for
                 real-time tasks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "307--335",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2004:DAF,
  author =       "Ying Zhang and Krishnendu Chakrabarty",
  title =        "Dynamic adaptation for fault tolerance and power
                 management in embedded real-time systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "336--360",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huang:2004:DDR,
  author =       "Zhining Huang and Sharad Malik and Nahri Moreano and
                 Guido Araujo",
  title =        "The design of dynamically reconfigurable datapath
                 coprocessors",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "361--384",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Noguera:2004:MRA,
  author =       "Juanjo Noguera and Rosa M. Badia",
  title =        "Multitasking on reconfigurable architectures:
                 microarchitecture support and dynamic scheduling",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "385--406",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2004:STC,
  author =       "Chuanjun Zhang and Frank Vahid and Roman Lysecky",
  title =        "A self-tuning cache architecture for embedded
                 systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "407--425",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{He:2004:AAA,
  author =       "Tian He and Brian M. Blum and John A. Stankovic and
                 Tarek Abdelzaher",
  title =        "{AIDA}: {Adaptive} application-independent data
                 aggregation in wireless sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "2",
  pages =        "426--457",
  month =        may,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Serpanos:2004:GES,
  author =       "Dimitrios N. Serpanos and Haris Lekatsas",
  title =        "Guest editorial: {Special} issue on embedded systems
                 and security",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "459--460",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ravi:2004:SES,
  author =       "Srivaths Ravi and Anand Raghunathan and Paul Kocher
                 and Sunil Hattangady",
  title =        "Security in embedded systems: {Design} challenges",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "461--491",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Coron:2004:SSL,
  author =       "Jean-Sebastien Coron and David Naccache and Paul
                 Kocher",
  title =        "Statistics and secret leakage",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "492--508",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wollinger:2004:EHC,
  author =       "Thomas Wollinger and Jan Pelzl and Volker
                 Wittelsberger and Christof Paar and G{\"o}kay Saldamli
                 and {\c{C}}etin K. Ko{\c{c}}",
  title =        "Elliptic and hyperelliptic curves on embedded
                 {$\mu$P}",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "509--533",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wollinger:2004:SFS,
  author =       "Thomas Wollinger and Jorge Guajardo and Christof
                 Paar",
  title =        "Security on {FPGAs}: {State-of-the-art}
                 implementations and attacks",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "534--574",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reyhani-Masoleh:2004:EDS,
  author =       "Arash Reyhani-Masoleh and M. Anwar Hasan",
  title =        "Efficient digit-serial normal basis multipliers over
                 binary extension fields",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "575--592",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reyhani-Masoleh:2004:TFT,
  author =       "Arash Reyhani-Masoleh and M. Anwar Hasan",
  title =        "Towards fault-tolerant cryptographic computations over
                 finite fields",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "593--613",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Liu:2004:FSM,
  author =       "Rong-Tai Liu and Nen-Fu Huang and Chih-Hao Chen and
                 Chia-Nan Kao",
  title =        "A fast string-matching algorithm for network
                 processor-based intrusion detection system",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "614--633",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Park:2004:LLS,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "{LiSP}: a lightweight security protocol for wireless
                 sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "3",
  pages =        "634--660",
  month =        aug,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:47 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Harkin:2004:MOR,
  author =       "J. Harkin and T. M. McGinnity and L. P. Maguire",
  title =        "Modeling and optimizing run-time reconfiguration using
                 evolutionary computation",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "661--685",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Im:2004:DVS,
  author =       "Chaeseok Im and Soonhoi Ha and Huiseok Kim",
  title =        "Dynamic voltage scheduling with buffers in low-power
                 multimedia applications",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "686--705",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Manolache:2004:SAA,
  author =       "Sorin Manolache and Petru Eles and Zebo Peng",
  title =        "Schedulability analysis of applications with
                 stochastic task execution times",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "706--735",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Serpanos:2004:EHS,
  author =       "Dimitrios N. Serpanos and Poluxeni Mountrouidou and
                 Maria Gamvrili",
  title =        "Evaluation of hardware and software schedulers for
                 embedded switches",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "736--759",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lanotte:2004:IFH,
  author =       "Ruggero Lanotte and Andrea Maggiolo-Schettini and
                 Simone Tini",
  title =        "Information flow in hybrid systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "760--799",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Liu:2004:MBA,
  author =       "Donggang Liu and Peng Ning",
  title =        "Multilevel {$\mu$TESLA}: {Broadcast} authentication
                 for distributed sensor networks",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "800--836",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chang:2004:RTG,
  author =       "Li-Pin Chang and Tei-Wei Kuo and Shi-Wu Lo",
  title =        "Real-time garbage collection for flash-memory storage
                 systems of real-time embedded systems",
  journal =      j-TECS,
  volume =       "3",
  number =       "4",
  pages =        "837--863",
  month =        nov,
  year =         "2004",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Oct 29 06:35:48 MDT 2004",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mueller:2005:ISI,
  author =       "Frank Mueller and Per Stenstr{\"o}m",
  title =        "Introduction to the special issue",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Krishnaswamy:2005:DCB,
  author =       "Arvind Krishnaswamy and Rajiv Gupta",
  title =        "Dynamic coalescing for 16-bit instructions",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "3--37",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Corliss:2005:IED,
  author =       "Marc L. Corliss and E. Christopher Lewis and Amir
                 Roth",
  title =        "The implementation and evaluation of dynamic code
                 decompression using {DISE}",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "38--72",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dhurjati:2005:MSG,
  author =       "Dinakar Dhurjati and Sumant Kowshik and Vikram Adve
                 and Chris Lattner",
  title =        "Memory safety without garbage collection for embedded
                 applications",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "73--111",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Pop:2005:SDF,
  author =       "Paul Pop and Petru Eles and Zebo Peng",
  title =        "Schedulability-driven frame packing for multicluster
                 distributed embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "112--140",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Swaminathan:2005:PBE,
  author =       "Vishnu Swaminathan and Krishnendu Chakrabarty",
  title =        "Pruning-based, energy-optimal, deterministic {I/O}
                 device scheduling for hard real-time systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "141--167",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chiou:2005:SAS,
  author =       "Lih-yih Chiou and Swarup Bhunia and Kaushik Roy",
  title =        "Synthesis of application-specific highly efficient
                 multi-mode cores for embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "168--188",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zambreno:2005:SOA,
  author =       "Joseph Zambreno and Alok Choudhary and Rahul Simha and
                 Bhagi Narahari and Nasir Memon",
  title =        "{SAFE-OPS}: an approach to embedded software
                 security",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "189--210",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kwon:2005:OVA,
  author =       "Woo-Cheol Kwon and Taewhan Kim",
  title =        "Optimal voltage allocation techniques for dynamically
                 variable voltage processors",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "211--230",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tan:2005:EME,
  author =       "T. K. Tan and A. Raghunathan and N. K. Jha",
  title =        "Energy macromodeling of embedded operating systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "1",
  pages =        "231--254",
  month =        feb,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Mar 24 15:48:07 MST 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Shukla:2005:GES,
  author =       "Sandeep K. Shukla and Jean-Pierre Talpin",
  title =        "Guest editorial: {Special} issue on models and
                 methodologies for co-design of embedded systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "225--227",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cachera:2005:VSP,
  author =       "David Cachera and Katell Morin-Allory",
  title =        "Verification of safety properties for parameterized
                 regular systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "228--266",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chouali:2005:PPM,
  author =       "S. Chouali and J. Julliand and P.-A. Masson and F.
                 Bellegarde",
  title =        "{PLTL}-partitioned model checking for reactive systems
                 under fairness assumptions",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "267--301",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gardner:2005:CCS,
  author =       "William B. Gardner",
  title =        "Converging {CSP} specifications and {C++} programming
                 via selective formalism",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "302--330",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ziller:2005:CSS,
  author =       "Roberto Ziller and Klaus Schneider",
  title =        "Combining supervisor synthesis and model checking",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "331--362",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2005:HCC,
  author =       "Chuanjun Zhang and Frank Vahid and Walid Najjar",
  title =        "A highly configurable cache for low energy embedded
                 systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "363--387",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kadayif:2005:DSO,
  author =       "I. Kadayif and M. Kandemir",
  title =        "Data space-oriented tiling for enhancing locality",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "388--414",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Muresan:2005:ICM,
  author =       "Radu Muresan and Catherine Gebotys",
  title =        "Instantaneous current modeling in a complex {VLIW}
                 processor core",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "415--451",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Petrov:2005:RCF,
  author =       "Peter Petrov and Alex Orailoglu",
  title =        "A reprogrammable customization framework for efficient
                 branch resolution in embedded processors",
  journal =      j-TECS,
  volume =       "4",
  number =       "2",
  pages =        "452--468",
  month =        may,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jun 21 16:50:36 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Burns:2005:E,
  author =       "Alan Burns",
  title =        "Editorial",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "469--471",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Sangiovanni-Vincentelli:2005:OES,
  author =       "Alberto L. Sangiovanni-Vincentelli and Alessandro
                 Pinto",
  title =        "An overview of embedded system design education at
                 {Berkeley}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "472--499",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Koopman:2005:UES,
  author =       "Philip Koopman and Howie Choset and Rajeev Gandhi and
                 Bruce Krogh and Diana Marculescu and Priya Narasimhan
                 and Joann M. Paul and Ragunathan Rajkumar and Daniel
                 Siewiorek and Asim Smailagic and Peter Steenkiste and
                 Donald E. Thomas and Chenxi Wang",
  title =        "Undergraduate embedded system education at {Carnegie
                 Mellon}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "500--528",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Verbauwhede:2005:SES,
  author =       "Ingrid Verbauwhede and Patrick Schaumont",
  title =        "Skiing the embedded systems mountain",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "529--548",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Sztipanovits:2005:IES,
  author =       "Janos Sztipanovits and Gautam Biswas and Ken Frampton
                 and Aniruddha Gokhale and Larry Howard and Gabor Karsai
                 and T. John Koo and Xenofon Koutsoukos and Douglas
                 C. Schmidt",
  title =        "Introducing embedded software and systems education
                 and advanced learning technology in an engineering
                 curriculum",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "549--568",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Seviora:2005:CES,
  author =       "Rudolph E. Seviora",
  title =        "A curriculum for embedded system engineering",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "569--586",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Caspi:2005:GGC,
  author =       "P. Caspi and A. Sangiovanni-Vincentelli and L. Almeida
                 and A. Benveniste and B. Bouyssounouse and G. Buttazzo
                 and I. Crnkovic and W. Damm and J. Engblom and
                 G. Folher and M. Garcia-Valls and H. Kopetz and
                 Y. Lakhnech and F. Laroussinie and L. Lavagno and
                 G. Lipari and F. Maraninchi and Ph. Peti and J. de la
                 Puente and N. Scaife and J. Sifakis and R. de Simone
                 and M. Torngren and P. Ver{\'\i}ssimo and
                 A. J. Wellings and R. Wilhelm and T. Willemse and W. Yi",
  title =        "Guidelines for a graduate curriculum on embedded
                 software and systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "587--611",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huang:2005:ESC,
  author =       "Tai-Yi Huang and Chung-Ta King and Youn-Long Steve Lin
                 and Yin-Tsung Hwang",
  title =        "The embedded software consortium of {Taiwan}",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "612--632",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Grimheden:2005:WES,
  author =       "Martin Grimheden and Martin T{\"o}rngren",
  title =        "What is embedded systems and how should it be
                 taught?---results from a didactic analysis",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "633--651",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2005:RDC,
  author =       "Wei Zhang and Mahmut Kandemir and Mustafa Karakoy and
                 Guangyu Chen",
  title =        "Reducing data cache leakage energy using a
                 compiler-based approach",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "652--678",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2005:DDC,
  author =       "Hyung Seok Kim and Tarek F. Abdelzaher and Wook Hyun
                 Kwon",
  title =        "Dynamic delay-constrained minimum-energy dissemination
                 in wireless sensor networks",
  journal =      j-TECS,
  volume =       "4",
  number =       "3",
  pages =        "679--706",
  month =        aug,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Sat Sep 17 15:05:12 MDT 2005",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Alur:2005:P,
  author =       "Rajeev Alur and Insup Lee",
  title =        "Preface",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "707--707",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tardieu:2005:LE,
  author =       "Olivier Tardieu and Robert de Simone",
  title =        "Loops in {ESTEREL}",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "708--750",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1113830.1113832",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Regehr:2005:ESO,
  author =       "John Regehr and Alastair Reid and Kirk Webb",
  title =        "Eliminating stack overflow by abstract
                 interpretation",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "751--778",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tripakis:2005:TDT,
  author =       "Stavros Tripakis and Christos Sofronis and Paul Caspi
                 and Adrian Curic",
  title =        "Translating discrete-time {Simulink} to {Lustre}",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "779--818",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1113830.1113834",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kadayif:2005:CDH,
  author =       "I. Kadayif and M. Kandemir and G. Chen and N.
                 Vijaykrishnan and M. J. Irwin and A. Sivasubramaniam",
  title =        "Compiler-directed high-level energy estimation and
                 optimization",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "819--850",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hu:2005:ADR,
  author =       "J. Hu and M. Kandemir and N. Vijaykrishnan and M. J.
                 Irwin",
  title =        "Analyzing data reuse for cache reconfiguration",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "851--876",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{He:2005:RFL,
  author =       "Tian He and Chengdu Huang and Brian M. Blum and John
                 A. Stankovic and Tarek F. Abdelzaher",
  title =        "Range-free localization and its impact on large scale
                 sensor networks",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "877--906",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gaujal:2005:SPA,
  author =       "Bruno Gaujal and Nicolas Navet and Cormac Walsh",
  title =        "Shortest-path algorithms for real-time scheduling of
                 {FIFO} tasks with minimal energy use",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "907--933",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Bartolini:2005:OIC,
  author =       "S. Bartolini and C. A. Prete",
  title =        "Optimizing instruction cache performance of embedded
                 systems",
  journal =      j-TECS,
  volume =       "4",
  number =       "4",
  pages =        "934--965",
  month =        nov,
  year =         "2005",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 16 10:59:18 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2006:RDL,
  author =       "W. Zhang and Y.-F. Tsai and D. Duarte and N.
                 Vijaykrishnan and M. Kandemir and M. J. Irwin",
  title =        "Reducing dynamic and leakage energy in {VLIW}
                 architectures",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "1--28",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Coussy:2006:FMH,
  author =       "Philippe Coussy and Emmanuel Casseau and Pierre Bomel
                 and Adel Baganne and Eric Martin",
  title =        "A formal method for hardware {IP} design and
                 integration under {I/O} and timing constraints",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "29--53",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Varea:2006:DFN,
  author =       "Mauricio Varea and Bashir M. Al-Hashimi and Luis A.
                 Cort{\'e}S and Petru Eles and Zebo Peng",
  title =        "{Dual Flow Nets}: {Modeling} the control\slash
                 data-flow relation in embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "54--81",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{AbouGhazaleh:2006:COS,
  author =       "Nevine AbouGhazaleh and Daniel Moss{\'e} and Bruce R.
                 Childers and Rami Melhem",
  title =        "Collaborative operating system and compiler power
                 management for real-time applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "82--115",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dean:2006:STI,
  author =       "Alexander G. Dean",
  title =        "Software thread integration for embedded system
                 display applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "116--151",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Alur:2006:PAR,
  author =       "Rajeev Alur and Thao Dang and Franjo
                 Ivan{\v{c}}i{\'c}",
  title =        "Predicate abstraction for reachability analysis of
                 hybrid systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "152--199",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Seth:2006:FFA,
  author =       "Kiran Seth and Aravindh Anantaraman and Frank Mueller
                 and Eric Rotenberg",
  title =        "{FAST}: {Frequency-Aware Static Timing} analysis",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "200--224",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chen:2006:RCS,
  author =       "G. Chen and M. Kandemir and M. J. Irwin and J.
                 Ramanujam",
  title =        "Reducing code size through address register
                 assignment",
  journal =      j-TECS,
  volume =       "5",
  number =       "1",
  pages =        "225--258",
  month =        feb,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu May 18 08:17:05 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jerraya:2006:GEC,
  author =       "Ahmed Jerraya and Trevor Mudge",
  title =        "Guest editorial: {Concurrent} hardware and software
                 design for multiprocessor {SoC}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "259--262",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Xu:2006:DMA,
  author =       "Jiang Xu and Wayne Wolf and Joerg Henkel and Srimat
                 Chakradhar",
  title =        "A design methodology for application-specific
                 networks-on-chip",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "263--280",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kangas:2006:UBM,
  author =       "Tero Kangas and Petri Kukkala and Heikki Orsila and
                 Erno Salminen and Marko H{\"a}nnik{\"a}inen and Timo
                 D. H{\"a}m{\"a}l{\"a}inen and Jouni Riihim{\"a}ki and
                 Kimmo Kuusilinna",
  title =        "{UML}-based multiprocessor {SoC} design framework",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "281--320",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hua:2006:EEE,
  author =       "Shaoxiong Hua and Gang Qu and Shuvra S.
                 Bhattacharyya",
  title =        "Energy-efficient embedded software implementation on
                 multiprocessor system-on-chip with multiple voltages",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "321--341",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hessel:2006:SRA,
  author =       "Fabiano Hessel and Vitor M. {Da Rosa} and Carlos
                 Eduardo Reif and C{\'e}sar Marcon and Tatiana {Gadelha
                 Serra Dos Santos}",
  title =        "Scheduling refinement in abstract {RTOS} models",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "342--354",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ou:2006:DSE,
  author =       "Jingzhao Ou and Viktor K. Prasanna",
  title =        "Design space exploration using arithmetic-level
                 hardware--software cosimulation for configurable
                 multiprocessor platforms",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "355--382",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Loghi:2006:CCT,
  author =       "Mirko Loghi and Massimo Poncino and Luca Benini",
  title =        "Cache coherence tradeoffs in shared-memory {MPSoCs}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "383--407",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lapalme:2006:NEE,
  author =       "James Lapalme and El Mostapha Aboulhamid and Gabriela
                 Nicolescu",
  title =        "A new efficient {EDA} tool design methodology",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "408--430",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reshadi:2006:RFI,
  author =       "Mehrdad Reshadi and Nikil Dutt and Prabhat Mishra",
  title =        "A retargetable framework for instruction-set
                 architecture simulation",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "431--452",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Memik:2006:ENP,
  author =       "Gokhan Memik and William H. Mangione-Smith",
  title =        "Evaluating {Network Processors} using {NetBench}",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "453--471",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Udayakumaran:2006:DAS,
  author =       "Sumesh Udayakumaran and Angel Dominguez and Rajeev
                 Barua",
  title =        "Dynamic allocation for scratch-pad memory using
                 compile-time decisions",
  journal =      j-TECS,
  volume =       "5",
  number =       "2",
  pages =        "472--511",
  month =        may,
  year =         "2006",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Aug 23 05:26:43 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wu:2006:EEU,
  author =       "Haisang Wu and Binoy Ravindran and E. Douglas Jensen
                 and Peng Li",
  title =        "Energy-efficient, utility accrual scheduling under
                 resource constraints for mobile embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "513--542",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165781",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present an energy-efficient, utility accrual,
                 real-time scheduling algorithm called ReUA. ReUA
                 considers an application model where activities are
                 subject to time/utility function time constraints,
                 mutual exclusion constraints on shared non-CPU
                 resources, and statistical performance requirements on
                 individual activity timeliness behavior. The algorithm
                 targets mobile embedded systems where {\em
                 system-level\/} energy consumption is also a major
                 concern. For such a model, we consider the scheduling
                 objectives of (1) satisfying the statistical
                 performance requirements and (2) maximizing the
                 system-level energy efficiency, while respecting
                 resource constraints. Since the problem is NP-hard,
                 ReUA allocates CPU cycles using statistical properties
                 of application cycle demands, and heuristically
                 computes schedules with a polynomial time cost. We
                 analytically establish several timeliness and
                 nontimeliness properties of the algorithm. Further, our
                 simulation experiments illustrate ReUA's effectiveness
                 and superiority.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Luo:2006:EEI,
  author =       "Liqian Luo and Tarek F. Abdelzaher and Tian He and
                 John A. Stankovic",
  title =        "{EnviroSuite}: an environmentally immersive
                 programming framework for sensor networks",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "543--576",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165782",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sensor networks open a new frontier for
                 embedded-distributed computing. Paradigms for sensor
                 network programming-in-the-large have been identified
                 as a significant challenge toward developing
                 large-scale applications. Classical programming
                 languages are too low-level. This paper presents the
                 design, implementation, and evaluation of EnviroSuite,
                 a programming framework that introduces a new paradigm,
                 called environmentally immersive programming, to
                 abstract distributed interactions with the environment.
                 Environmentally immersive programming refers to an
                 object-based programming model in which individual
                 objects represent physical elements in the external
                 environment. It allows the programmer to think directly
                 in terms of environmental abstractions. EnviroSuite
                 provides language primitives for environmentally
                 immersive programming that map transparently into a
                 support library of distributed algorithms for tracking
                 and environmental monitoring. We show how nesC code of
                 realistic applications is significantly simplified
                 using EnviroSuite and demonstrate the resulting system
                 performance on Mica2 and XSM platforms.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gebotys:2006:SMC,
  author =       "Catherine H. Gebotys",
  title =        "A split-mask countermeasure for low-energy secure
                 embedded systems",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "577--612",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165783",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Future wireless embedded devices will be increasingly
                 powerful, supporting many more applications, including
                 one of the most crucial---security. Although many
                 embedded devices offer more resistance to bus---probing
                 attacks because of their compact size, susceptibility
                 to power or electromagnetic analysis attacks must be
                 analyzed. This paper presents a new split-mask
                 countermeasure to thwart low-order differential power
                 analysis (DPA) and differential EM analysis (DEMA). For
                 the first time, real-power and EM measurements are used
                 to analyze the difficulty of launching new third-order
                 DPA and DEMA attacks on a popular low-energy 32-bit
                 embedded ARM processor. Results show that the new
                 split-mask countermeasure provides increased security
                 without large overheads of energy dissipation, compared
                 to previous research. With the emergence of security
                 applications in PDAs, cell phones, and other embedded
                 devices, low-energy countermeasures for resistance to
                 low-order DPA/DEMA is crucial for supporting future
                 enabled wireless internet.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhuang:2006:PLS,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Parallelizing load\slash stores on dual-bank memory
                 embedded processors",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "613--657",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165784",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many modern embedded processors such as DSPs support
                 partitioned memory banks (also called X--Y memory or
                 dual-bank memory) along with parallel load/store
                 instructions to achieve higher code density and
                 performance. In order to effectively utilize the
                 parallel load/store instructions, the compiler must
                 partition the memory-resident values and assign them to
                 X or Y bank. This paper gives a postregister allocation
                 solution to merge the generated load/store instructions
                 into their parallel counterparts. Simultaneously, our
                 framework performs allocation of values to X or Y
                 memory banks. We first remove as many load/stores and
                 register--register moves as possible through an
                 excellent iterated coalescing based register allocator
                 by Appel and George [1996]. We then attempt to
                 parallelize the generated load/stores using a multipass
                 approach. The basic phase of our approach attempts the
                 merger of load/stores without duplication and web
                 splitting. We model this problem as a graph-coloring
                 problem in which each value is colored as either X or
                 Y. We then construct a motion scheduling graph (MSG),
                 based on the range of motion for each load/store
                 instruction. MSG reflects potential instructions that
                 could be merged. We propose a notion of pseudofixed
                 boundaries so that the load/store movement is less
                 affected by register dependencies. We prove that the
                 coloring problem for MSG is NP-complete and solve it
                 with two different heuristic algorithms with different
                 complexity. We then propose a two-level iterative
                 process to attempt instruction duplication, variable
                 duplication, web splitting, and local conflict
                 elimination to effectively merge the remaining
                 load/stores. Finally, we clean up some multiple-aliased
                 load/stores. To improve the performance, we combine
                 profiling information with each stage coupled with some
                 modifications to the algorithm. We show that our
                 framework results in parallelization of a large number
                 of load/stores without much growth in data and code
                 segments. The average speedup for our optimization pass
                 reaches roughly 13\% if no profile information is
                 available and 17\% with profile information. The
                 average code and data segment growth is controlled
                 within 13\%.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jones:2006:RPW,
  author =       "Alex K. Jones and Raymond Hoare and Dara Kusic and
                 Gayatri Mehta and Josh Fazekas and John Foster",
  title =        "Reducing power while increasing performance with
                 {SuperCISC}",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "658--686",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165785",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multiprocessor Systems on Chips (MPSoCs) have become a
                 popular architectural technique to increase
                 performance. However, MPSoCs may lead to undesirable
                 power consumption characteristics for computing systems
                 that have strict power budgets, such as PDAs, mobile
                 phones, and notebook computers. This paper presents the
                 super-complex instruction-set computing (SuperCISC)
                 Embedded Processor Architecture and, in particular,
                 investigates performance and power consumption of this
                 device compared to traditional processor
                 architecture-based execution. SuperCISC is a
                 heterogeneous, multicore processor architecture
                 designed to exceed performance of traditional embedded
                 processors while maintaining a reduced power budget
                 compared to low-power embedded processors. At the heart
                 of the SuperCISC processor is a multicore VLIW (Very
                 Large Instruction Word) containing several homogeneous
                 execution cores/functional units. In addition, complex
                 and heterogeneous combinational hardware function cores
                 are tightly integrated to the core VLIW engine
                 providing an opportunity for improved performance and
                 reduced energy consumption. Our SuperCISC processor
                 core has been synthesized for both a 90-nm Stratix II
                 Field Programmable Gate Aray (FPGA) and a 160-nm
                 standard cell Application-Specific Integrated Circuit
                 (ASIC) fabrication process from OKI, each operating at
                 approximately 167 MHz for the VLIW core. We examine
                 several reasons for speedup and power improvement
                 through the SuperCISC architecture, including
                 predicated control flow, cycle compression, and a
                 reduction in arithmetic power consumption, which we
                 call power compression. Finally, testing our SuperCISC
                 processor with multimedia and signal-processing
                 benchmarks, we show how the SuperCISC processor can
                 provide performance improvements ranging from 7X to
                 160X with an average of 60X, while also providing
                 orders of magnitude of power improvements for the
                 computational kernels. The power improvements for our
                 benchmark kernels range from just over 40X to over
                 400X, with an average savings exceeding 130X. By
                 combining these power and performance improvements, our
                 total energy improvements all exceed 1000X. As these
                 savings are limited to the computational kernels of the
                 applications, which often consume approximately 90\% of
                 the execution time, we expect our savings to approach
                 the ideal application improvement of 10X.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Girault:2006:ARD,
  author =       "Alain Girault and Xavier Nicollin and Marc Pouzet",
  title =        "Automatic rate desynchronization of embedded reactive
                 programs",
  journal =      j-TECS,
  volume =       "5",
  number =       "3",
  pages =        "687--717",
  month =        aug,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1165780.1165786",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Wed Oct 11 06:45:18 MDT 2006",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many embedded reactive programs perform computations
                 at different rates, while still requiring the overall
                 application to satisfy very tight temporal constraints.
                 We propose a method to automatically distribute
                 programs such that the obtained parts can be run at
                 different rates, which we call rate desynchronization.
                 We consider general programs whose control structure is
                 a finite state automaton and with a DAG of actions in
                 each state. The motivation is to take into account
                 long-duration tasks inside the programs: these are
                 tasks whose execution time is long compared to the
                 other computations in the application, and whose
                 maximal execution rate is known and bounded. Merely
                 scheduling such a long duration task at a slow rate
                 would not work since the whole program would be slowed
                 down if compiled into sequential code. It would thus be
                 impossible to meet the temporal constraints, unless
                 such long duration tasks could be desynchronized from
                 the remaining computations. This is precisely what our
                 method achieves: it distributes the initial program
                 into several parts, so that the parts performing the
                 slow computations can be run at an appropriate rate,
                 therefore not impairing the global reaction time of the
                 program. We present in detail our method, all the
                 involved algorithms, and a small running example. We
                 also compare our method with the related work.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Biswas:2006:MOP,
  author =       "Surupa Biswas and Thomas Carley and Matthew Simpson
                 and Bhuvan Middha and Rajeev Barua",
  title =        "Memory overflow protection for embedded systems using
                 run-time checks, reuse, and compression",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "719--752",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1196636.1196637",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems usually lack virtual memory and are
                 vulnerable to memory overflow since they lack a
                 mechanism to detect overflow or use swap space
                 thereafter. We present a method to detect memory
                 overflows using compiler-inserted software run-time
                 checks. Its overheads in run-time and energy are 1.35
                 and 1.12\%, respectively. Detection of overflow allows
                 system-specific remedial action. We also present
                 techniques to grow the stack or heap segment after they
                 overflow, into previously unutilized space, such as
                 dead variables, free holes in the heap, and space freed
                 by compressing live variables. These may avoid the
                 out-of-memory error if the space recovered is enough to
                 complete execution. The reuse methods are able to grow
                 the stack or heap beyond its overflow by an amount that
                 varies widely by application---the amount of recovered
                 space ranges from 0.7 to 93.5\% of the combined stack
                 and heap size.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "data compression; heap overflow; out-of-memory errors;
                 reliability; reuse; run-time checks; stack overflow",
}

@Article{Higuera-Toledano:2006:HSD,
  author =       "M. Teresa Higuera-Toledano",
  title =        "Hardware support for detecting illegal references in a
                 multiapplication real-time {Java} environment",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "753--772",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1196636.1196638",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Our objective is to adapt the Java memory management
                 to an embedded system, e.g., a wireless PDA executing
                 concurrent multimedia applications within a single JVM.
                 This paper provides software, and hardware-based
                 solutions detecting both illegal references across the
                 application memory spaces and dangling pointers within
                 an application space. We give an approach to
                 divide/share the memory among the applications
                 executing concurrently in the system. We introduce and
                 define application-specific memory, building upon the
                 real-time specification for Java (RTSJ) from the
                 real-time Java expert group. The memory model used in
                 RTSJ imposes strict rules for assignment between memory
                 areas, preventing the creation of dangling pointers,
                 and thus maintaining the pointer safety of Java. Our
                 implementation solution to ensure the checking of these
                 rules before each assignment inserts write barriers
                 that use a stack-based algorithm. This solution
                 adversely affects both the performance and
                 predictability of the RTSJ applications, which can be
                 improved by using an existing hardware support.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "garbage collection; memory management; write
                 barriers",
}

@Article{Winter:2006:TPC,
  author =       "Victor L. Winter and Jason Beranek and Fares Fraij and
                 Steve Roach and Greg Wickstrom",
  title =        "A transformational perspective into the core of an
                 abstract class loader for the {SSP}",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "773--818",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1196636.1196639",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The SSP is a hardware implementation of a subset of
                 the JVM for use in high-consequence embedded
                 applications. In this context, a majority of the
                 activities belonging to class loading, as it is defined
                 in the specification of the JVM, can be performed
                 statically. Static class loading has the net result of
                 dramatically simplifying the design of the SSP, as well
                 as increasing its performance. Because of the high
                 consequence nature of its applications, strong evidence
                 must be provided that all aspects of the SSP have been
                 implemented correctly. This includes the class loader.
                 This article explores the possibility of formally
                 verifying a class loader for the SSP implemented in the
                 strategic programming language TL. Specifically, an
                 implementation of the core activities of an abstract
                 class loader is presented and its verification in ACL2
                 is considered.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "HATS; higher-order rewriting; SSP; strategic
                 programming; TL",
}

@Article{Kulkarni:2006:VVI,
  author =       "Prasad Kulkarni and Wankang Zhao and Stephen Hines and
                 David Whalley and Xin Yuan and Robert van Engelen and
                 Kyle Gallivan and Jason Hiser and Jack Davidson and
                 Baosheng Cai and Mark Bailey and Hwashin Moon and
                 Kyunghwan Cho and Yunheung Paek",
  title =        "{VISTA}: {VPO} interactive system for tuning
                 applications",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "819--863",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1196636.1196640",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software designers face many challenges when
                 developing applications for embedded systems. One major
                 challenge is meeting the conflicting constraints of
                 speed, code size, and power consumption. Embedded
                 application developers often resort to hand-coded
                 assembly language to meet these constraints since
                 traditional optimizing compiler technology is usually
                 of little help in addressing this challenge. The
                 results are software systems that are not portable,
                 less robust, and more costly to develop and maintain.
                 Another limitation is that compilers traditionally
                 apply the optimizations to a program in a fixed order.
                 However, it has long been known that a single ordering
                 of optimization phases will not produce the best code
                 for every application. In fact, the smallest unit of
                 compilation in most compilers is typically a function
                 and the programmer has no control over the code
                 improvement process other than setting flags to enable
                 or disable certain optimization phases. This paper
                 describes a new code improvement paradigm implemented
                 in a system called VISTA that can help achieve the
                 cost/performance trade-offs that embedded applications
                 demand. The VISTA system opens the code improvement
                 process and gives the application programmer, when
                 necessary, the ability to finely control it. VISTA also
                 provides support for finding effective sequences of
                 optimization phases. This support includes the ability
                 to interactively get static and dynamic performance
                 information, which can be used by the developer to
                 steer the code improvement process. This performance
                 information is also internally used by VISTA for
                 automatically selecting the best optimization sequence
                 from several attempted. One such feature is the use of
                 a genetic algorithm to search for the most efficient
                 sequence based on specified fitness criteria. We
                 include a number of experimental results that evaluate
                 the effectiveness of using a genetic algorithm in VISTA
                 to find effective optimization phase sequences.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "genetic algorithms; interactive compilation; phase
                 ordering; user-directed code improvement",
}

@Article{Ottoni:2006:OAU,
  author =       "Desiree Ottoni and Guilherme Ottoni and Guido Araujo
                 and Rainer Leupers",
  title =        "Offset assignment using simultaneous variable
                 coalescing",
  journal =      j-TECS,
  volume =       "5",
  number =       "4",
  pages =        "864--883",
  month =        nov,
  year =         "2006",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1196636.1196641",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:45 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The generation of efficient addressing code is a
                 central problem in compiling for processors with
                 restricted addressing modes, like digital signal
                 processors (DSPs). Offset assignment (OA) is the
                 problem of allocating scalar variables to memory, so as
                 to minimize the need of addressing instructions. This
                 problem is called simple offset assignment (SOA) when a
                 single address register is available, and general
                 offset assignment (GOA) when more address registers are
                 used. This paper shows how variables' liveness
                 information can be used to dramatically reduce the
                 addressing instructions required to access local
                 variables on the program stack. Two techniques that
                 make effective use of variable coalescing to solve SOA
                 and GOA are described, namely coalescing SOA (CSOA) and
                 coalescing GOA (CGOA). In addition, a thorough
                 comparison between these algorithms and others
                 described in the literature is presented. The
                 experimental results, when compiling MediaBench
                 benchmark programs with the LANCE compiler, reveal a
                 very significant improvement of the proposed techniques
                 over the other available solutions to the problem.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "address registers; autoincrement addressing modes;
                 DSPs; register allocation; stack offset assignment;
                 variable coalescing",
}

@Article{Whalley:2007:GE,
  author =       "David Whalley",
  title =        "Guest {Editorial}",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1216577",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kumar:2007:ESI,
  author =       "Nagendra J. Kumar and Vasanth Asokan and Siddhartha
                 Shivshankar and Alexander G. Dean",
  title =        "Efficient software implementation of embedded
                 communication protocol controllers using asynchronous
                 software thread integration with time- and
                 space-efficient procedure calls",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210270",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The overhead of context switching limits efficient
                 scheduling of multiple concurrent threads on a
                 uniprocessor when real-time requirements exist. A
                 software-implemented protocol controller may be
                 crippled by this problem. The available idle time may
                 be too short to recover through context switching, so
                 only the primary thread can execute during message
                 activity, slowing the secondary threads and potentially
                 missing deadlines. Asynchronous software thread
                 integration (ASTI) uses coroutine calls and
                 integration, letting threads make independent progress
                 efficiently, and reducing the needed context switches.
                 We demonstrate the methods with a software
                 implementation of an automotive communication protocol
                 (J1850) and several secondary threads.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "asynchronous software thread integration; fine-grain
                 concurrency; hardware to software migration; J1850;
                 software-implemented communication protocol
                 controllers",
}

@Article{Zhuang:2007:PEP,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Power-efficient prefetching for embedded processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210271",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Because of stringent power constraints, aggressive
                 latency-hiding approaches, such as prefetching, are
                 absent in the state-of-the-art embedded processors.
                 There are two main reasons that make prefetching power
                 inefficient. First, compiler-inserted prefetch
                 instructions increase code size and, therefore, could
                 increase I-cache power. Second, inaccurate prefetching
                 (especially for hardware prefetching) leads to high
                 D-cache power consumption because of useless accesses.
                 In this work, we show that it is possible to support
                 power-efficient prefetching through bit-differential
                 offset assignment. We target the prefetching of
                 relocatable stack variables with a high degree of
                 precision. By assigning the offsets of stack variables
                 in such a way that most consecutive addresses differ by
                 1 bit, we can prefetch them with compact prefetch
                 instructions to save I-cache power. The compiler first
                 generates an access graph of consecutive memory
                 references and then attempts a layout of the memory
                 locations in the smallest hypercube. Each dimension of
                 the hypercube represents a 1-bit differential
                 addressing. The embedding is carried out in as compact
                 a hypercube as possible in order to save memory space.
                 Each load/store instruction carries a hint regarding
                 prefetching the next memory reference by encoding its
                 differential address with respect to the current one.
                 To reduce D-cache power cost, we further attempt to
                 assign offsets so that most of the consecutive accesses
                 map to the same cache line. Our prefetching is done
                 using a one entry line buffer [Wilson et al. 1996].
                 Consequently, many look-ups in D-cache reduce to
                 incremental ones. This results in D-cache activity
                 reduction and power savings. Our prefetcher requires
                 both compiler and hardware support. In this paper, we
                 provide implementation on the processor model close to
                 ARM with small modification to the ISA. We tackle
                 issues such as out-of-order commit, predication, and
                 speculation through simple modifications to the
                 processor pipeline on noncritical paths. Our goal in
                 this work is to boost performance while
                 maintaining/lowering power consumption. Our results
                 show 12\% speedup and slight power reduction. The
                 runtime virtual space loss for stack and static data is
                 about 11.8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "bit-differential addressing; data prefetching;
                 embedded processors; offset assignment",
}

@Article{Contreras:2007:XPP,
  author =       "Gilberto Contreras and Margaret Martonosi and Jinzhang
                 Peng and Guei-Yuan Lueh and Roy Ju",
  title =        "The {XTREM} power and performance simulator for the
                 {Intel XScale} core: {Design} and experiences",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210272",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Managing power concerns in microprocessors has become
                 a pressing research problem across the domains of
                 computer architecture, CAD, and compilers. As a result,
                 several parameterized cycle-level power simulators have
                 been introduced. While these simulators can be quite
                 useful for microarchitectural studies, their generality
                 limits how accurate they can be for any one chip
                 family. Furthermore, their hardware focus means that
                 they do not explicitly enable studying the interaction
                 of different software layers, such as Java applications
                 and their underlying runtime system software. This
                 paper describes and evaluates XTREM, a power-simulation
                 tool tailored for the Intel XScale microarchitecture.
                 In building XTREM, our goals were to develop a
                 microarchitecture simulator that, while still offering
                 size parameterizations for cache and other structures,
                 more accurately reflected a realistic processor
                 pipeline. We present a detailed set of validations
                 based on multimeter power measurements and hardware
                 performance counter sampling. XTREM exhibits an average
                 performance error of only 6.5\% and an even smaller
                 average power error: 4\%. The paper goes on to present
                 an application study enabled by the simulator. Namely,
                 we use XTREM to produce an energy consumption breakdown
                 for Java CDC and CLDC applications. Our simulator
                 measurements indicate that a large percentage of the
                 total energy consumption (up to 35\%) is devoted to the
                 virtual machine's support functions.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Intel XScale technology; Java; power measurements;
                 power modeling",
}

@Article{DeSutter:2007:LTC,
  author =       "Bjorn {De Sutter} and Ludo {Van Put} and Dominique
                 Chanet and Bruno {De Bus} and Koen {De Bosschere}",
  title =        "Link-time compaction and optimization of {ARM}
                 executables",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210273",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The overhead in terms of code size, power consumption,
                 and execution time caused by the use of precompiled
                 libraries and separate compilation is often
                 unacceptable in the embedded world, where real-time
                 constraints, battery life-time, and production costs
                 are of critical importance. In this paper, we present
                 our link-time optimizer for the ARM architecture. We
                 discuss how we can deal with the peculiarities of the
                 ARM architecture related to its visible program counter
                 and how the introduced overhead can to a large extent
                 be eliminated. Our link-time optimizer is evaluated
                 with four tool chains, two proprietary ones from ARM
                 and two open ones based on GNU GCC. When used with
                 proprietary tool chains from ARM Ltd., our link-time
                 optimizer achieved average code size reductions of 16.0
                 and 18.5\%, while the programs have become 12.8 and
                 12.3\% faster, and 10.7 to 10.1\% more energy
                 efficient. Finally, we show how the incorporation of
                 link-time optimization in tool chains may influence
                 library interface design.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compaction; linker; optimization; performance",
}

@Article{Panainte:2007:MCR,
  author =       "Elena Moscu Panainte and Koen Bertels and Stamatis
                 Vassiliadis",
  title =        "The {Molen} compiler for reconfigurable processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210274",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we describe the compiler developed to
                 target the Molen reconfigurable processor and
                 programming paradigm. The compiler automatically
                 generates optimized binary code for C applications,
                 based on pragma annotation of the code executed on the
                 reconfigurable hardware. For the IBM PowerPC 405
                 processor included in the Virtex II Pro platform FPGA,
                 we implemented code generation, register, and stack
                 frame allocation following the PowerPC EABI (embedded
                 application binary interface). The PowerPC backend has
                 been extended to generate the appropriate instructions
                 for the reconfigurable hardware and data transfer,
                 taking into account the information of the specific
                 hardware implementations and system. Starting with an
                 annotated C application, a complete design flow has
                 been integrated to generate the executable bitstream
                 for the reconfigurable processor. The flexible design
                 of the proposed infrastructure allows to consider the
                 special features of the reconfigurable architectures.
                 In order to hide the reconfiguration latencies, we
                 implemented an instruction-scheduling algorithm for the
                 dynamic hardware configuration instructions. The
                 algorithm schedules, in advance, the hardware
                 configuration instructions, taking into account the
                 conflicts for the reconfigurable hardware resources
                 (FPGA area) between the hardware operations. To verify
                 the Molen compiler, we used the multimedia video frame
                 M-JPEG encoder of which the extended discrete cosine
                 transform (DCT*) function was mapped on the FPGA. We
                 obtained an overall speedup of 2.5 (about 84\%
                 efficiency over the maximal theoretical speedup of
                 2.96). The performance efficiency is achieved using
                 automatically generated nonoptimized DCT* hardware
                 implementation. The instruction-scheduling algorithm
                 has been tested for DCT, quantization, and VLC
                 operations. Based on simulation results, we determine
                 that, while a simple scheduling produces a significant
                 performance decrease, our proposed scheduling
                 contributes for up to $16\times$ M-JPEG encoder
                 speedup.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "FPGA; instruction scheduling; reconfigurable
                 computing",
}

@Article{Tan:2007:TAP,
  author =       "Yudong Tan and Vincent Mooney",
  title =        "Timing analysis for preemptive multitasking real-time
                 systems with caches",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210275",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we propose an approach to estimate the
                 worst-case response time (WCRT) of each task in a
                 preemptive multitasking single-processor real-time
                 system utilizing an L1 cache. The approach combines
                 intertask cache-eviction analysis and intratask
                 cache-access analysis to estimate the number of cache
                 lines that can possibly be evicted by the preempting
                 task and also be accessed again by the preempted task
                 after preemptions (thus requiring the preempted task to
                 reload the cache line(s)). This cache-reload delay
                 caused by preempting task(s) is then incorporated into
                 WCRT analysis. Three sets of applications with up to
                 six concurrent tasks running are used to test our
                 approach. The experimental results show that our
                 approach can tighten the WCRT estimate by up to 32\%
                 ($1.4\times$) over prior state-of-the-art.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "real-time; worst-case response time",
}

@Article{Ratschan:2007:SVH,
  author =       "Stefan Ratschan and Zhikun She",
  title =        "Safety verification of hybrid systems by constraint
                 propagation-based abstraction refinement",
  journal =      j-TECS,
  volume =       "6",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1210268.1210276",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:20:58 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper deals with the problem of safety
                 verification of nonlinear hybrid systems. We start from
                 a classical method that uses interval arithmetic to
                 check whether trajectories can move over the boundaries
                 in a rectangular grid. We put this method into an
                 abstraction refinement framework and improve it by
                 developing an additional refinement step that employs
                 interval-constraint propagation to add information to
                 the abstraction without introducing new grid elements.
                 Moreover, the resulting method allows switching
                 conditions, initial states, and unsafe states to be
                 described by complex constraints, instead of sets that
                 correspond to grid elements. Nevertheless, the method
                 can be easily implemented, since it is based on a
                 well-defined set of constraints, on which one can run
                 any constraint propagation-based solver. Tests of such
                 an implementation are promising.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "constraint propagation; hybrid systems; intervals",
}

@Article{Schepers:2007:GEI,
  author =       "Henk Schepers",
  title =        "Guest editorial: {Introduction} to the special issue
                 on software and compilers for embedded systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234676",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2007:SCT,
  author =       "Sheayun Lee and Jaejin Lee and Chang Yun Park and Sang
                 Lyul Min",
  title =        "Selective code transformation for dual instruction set
                 processors",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "10:1--10:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234677",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems are often constrained in terms of
                 both code size and execution time, because of a limited
                 amount of available memory and real-time nature of
                 applications. A dual instruction set processor, which
                 supports a reduced instruction set (16
                 bits/instruction), in addition to a full instruction
                 set (32 bits/instruction), allows an opportunity for a
                 tradeoff between these two design criteria.
                 Specifically, while the reduced instruction set can be
                 used to reduce code size by providing smaller
                 instructions, a program compiled into the reduced
                 instruction set typically runs slower than the same
                 program compiled into the full instruction set.
                 Motivated by this observation, we propose a code
                 generation technique that exploits this tradeoff
                 relationship by selectively using the two instruction
                 sets for different sections in the program. The
                 proposed technique, called selective code
                 transformation, not only provides a mechanism to enable
                 a flexible tradeoff between a program's code size and
                 its execution time, but also facilitates program
                 optimization toward enhancing its worst case
                 performance. The results from our experiments show that
                 our proposed technique can be effectively used to
                 fine-tune an application program on a spectrum of code
                 size and execution performance, which, in turn, enables
                 a system-wide optimization on memory space and
                 execution speed involving multiple applications.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "dual instruction set processors; mixed-width
                 instruction set architecture; reduced bid-width
                 instruction set architecture",
}

@Article{Zhang:2007:RBP,
  author =       "Wei Zhang and Bramha Allu",
  title =        "Reducing branch predictor leakage energy by exploiting
                 loops",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234678",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the scaling of technology, leakage energy will
                 become the dominant source of energy consumption.
                 Besides cache memories, branch predictors are among the
                 largest on-chip array structures and consume nontrivial
                 leakage energy. This paper proposes two cost-effective
                 loop-based strategies to reduce the branch predictor
                 leakage without impacting prediction accuracy or
                 performance. The loop-based approaches exploit the fact
                 that loops usually only contain a small number of
                 instructions and, hence, even fewer branch instructions
                 while taking a significant fraction of the execution
                 time. Consequently, all the nonactive entries of branch
                 predictors can be placed into the low leakage mode
                 during the loop execution in order to reduce leakage
                 energy. Compiler and circuit supports are discussed to
                 implement the proposed leakage-reduction strategies.
                 Compared to the recently proposed decay-based approach,
                 our experimental results show that the loop-based
                 approach can extract 16.2\% more dead time of the
                 branch predictor, on average, leading to more leakage
                 energy savings without impacting the branch prediction
                 accuracy and performance.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "branch prediction; compiler; leakage energy",
}

@Article{Scharwaechter:2007:AAE,
  author =       "Hanno Scharwaechter and David Kammler and Andreas
                 Wieferink and Manuel Hohenauer and Kingshuk Karuri and
                 Jianjiang Ceng and Rainer Leupers and Gerd Ascheid and
                 Heinrich Meyr",
  title =        "{ASIP} architecture exploration for efficient {IPSec}
                 encryption: a case study",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234679",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application-Specific Instruction-Set Processors
                 (ASIPs) are becoming increasingly popular in the world
                 of customized, application-driven System-on-Chip (SoC)
                 designs. Efficient ASIP design requires an iterative
                 architecture exploration loop---gradual refinement of
                 the processor architecture starting from an initial
                 template. To accomplish this task, design automation
                 tools are used to detect bottlenecks in embedded
                 applications, to implement application-specific
                 processor instructions, and to automatically generate
                 the required software tools (such as instruction-set
                 simulator, C-compiler, assembler, and profiler), as
                 well as to synthesize the hardware. This paper
                 describes an architecture exploration loop for an ASIP
                 coprocessor that implements common encryption
                 functionality used in symmetric block cipher algorithms
                 for internet protocol security (IPSec). The coprocessor
                 is accessed via shared memory and, as a consequence,
                 our approach is easily adaptable to arbitrary main
                 processor architectures. This paper presents the
                 extended version of our case study that has been
                 already published on the SCOPES conference in 2004. In
                 both papers, a MIPS architecture is used as the main
                 processor and Blowfish as encryption algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ADL; ASIP; computer-aided design; IPSec",
}

@Article{Turjan:2007:CIC,
  author =       "Alexandru Turjan and Bart Kienhuis and Ed
                 Deprettere",
  title =        "Classifying interprocess communication in process
                 network representation of nested-loop programs",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "13:1--13:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234680",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "New embedded signal-processing architectures are
                 emerging that are composed of loosely coupled
                 heterogeneous components like CPUs or DSPs, specialized
                 IP cores, reconfigurable units, or memories. We believe
                 that these architectures should be programmed using the
                 process network model of computation. To ease the
                 mapping of applications, we are developing the Compaan
                 compiler that automatically derives a process network
                 (PN) description from an application written in Matlab
                 or C. In this paper, we investigate a particular
                 problem in nested loop programs, which is about
                 classifying the interprocess communication in the PN
                 representation of the nested loop program. The global
                 memory arrays present in the code have to be replaced
                 by a distributed communication structure used for
                 communicating data between the network processes. We
                 show that four types of communication exist, each
                 exhibiting different requirements when realizing them
                 in hardware or software. We first present two compile
                 time tests that are based on integer linear programming
                 to decide the type of the communication. In the second
                 part of this paper, we present alternative
                 classification techniques that have polynomial
                 complexity. However, in some cases, those techniques do
                 not give a definitive answer and the ILP tests have to
                 be applied. All present tests are combined in a hybrid
                 classification scheme that correctly classifies the
                 interprocess communication. In only 5\% of the cases to
                 classify, we have to rely on integer linear programming
                 while, in the remaining 95\%, the alternative
                 techniques presented in this paper are able to
                 correctly classify each case. The hybrid classification
                 scheme has become an important part of our Compaan
                 compiler.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "hybrid classification approach; integer linear
                 programming; matrix manipulations; static analysis",
}

@Article{Ko:2007:BSA,
  author =       "Ming-Yung Ko and Praveen K. Murthy and Shuvra S.
                 Bhattacharyya",
  title =        "Beyond single-appearance schedules: {Efficient DSP}
                 software synthesis using nested procedure calls",
  journal =      j-TECS,
  volume =       "6",
  number =       "2",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1234675.1234681",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Synthesis of digital signal-processing (DSP) software
                 from dataflow-based formal models is an effective
                 approach for tackling the complexity of modern DSP
                 applications. In this paper, an efficient method is
                 proposed for applying subroutine call instantiation of
                 module functionality when synthesizing embedded
                 software from a dataflow specification. The technique
                 is based on a novel recursive decomposition of
                 subgraphs in a cluster hierarchy that is optimized for
                 low buffer size. Applying this technique, one can
                 achieve significantly lower buffer sizes than what is
                 available for minimum code size inlined schedules,
                 which have been the emphasis of prior work on software
                 synthesis. Furthermore, it is guaranteed that the
                 number of procedure calls in the synthesized program is
                 polynomially bounded in the size of the input dataflow
                 graph, even though the number of module invocations may
                 increase exponentially. This recursive decomposition
                 approach provides an efficient means for integrating
                 subroutine-based module instantiation into the design
                 space of DSP software synthesis. The experimental
                 results demonstrate a significant improvement in buffer
                 cost, especially for more irregular multirate DSP
                 applications, with moderate code and execution time
                 overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "block diagram compiler; design methodology; embedded
                 systems; hierarchical graph decomposition; memory
                 optimization; procedural implementation; synchronous
                 dataflow",
}

@Article{Hua:2007:PDM,
  author =       "Shaoxiong Hua and Gang Qu and Shuvra S.
                 Bhattacharyya",
  title =        "Probabilistic design of multimedia embedded systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275987",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we propose the novel concept of
                 probabilistic design for multimedia embedded systems,
                 which is motivated by the challenge of how to design,
                 but not overdesign, such systems while systematically
                 incorporating performance requirements of multimedia
                 application, uncertainties in execution time, and
                 tolerance for reasonable execution failures. Unlike
                 most present techniques that are based on either worst-
                 or average-case execution times of application tasks,
                 where the former guarantees the completion of each
                 execution, but often leads to overdesigned systems, and
                 the latter fails to provide any completion guarantees,
                 the proposed probabilistic design method takes
                 advantage of unique features mentioned above of
                 multimedia systems to relax the rigid hardware
                 requirements for software implementation and avoid
                 overdesigning the system. In essence, this relaxation
                 expands the design space and we further develop an
                 off-line on-line minimum effort algorithm for quick
                 exploration of the enlarged design space at early
                 design stages. This is the first step toward our goal
                 of bridging the gap between real-time analysis and
                 embedded software implementation for rapid and economic
                 multimedia system design. It is our belief that the
                 proposed method has great potential in reducing system
                 resource while meeting performance requirements. The
                 experimental results confirm this as we achieve
                 significant saving in system's energy consumption to
                 provide a statistical completion ratio guarantee (i.e.,
                 the expected number of completions over a large number
                 of iterations is greater than a given value).",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "completion ratio; energy minimization;
                 hardware/software codesign; multiple voltage;
                 probabilistic design; soft real-time system",
}

@Article{Koushanfar:2007:TMC,
  author =       "Farinaz Koushanfar and Abhijit Davare and David T.
                 Nguyen and Alberto Sangiovanni-Vincentelli and Miodrag
                 Potkonjak",
  title =        "Techniques for maintaining connectivity in wireless
                 ad-hoc networks under energy constraints",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275988",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed wireless systems (DWSs) are emerging as
                 the enabler for next-generation wireless applications.
                 There is a consensus that DWS-based applications, such
                 as pervasive computing, sensor networks, wireless
                 information networks, and speech and data communication
                 networks, will form the backbone of the next
                 technological revolution. Simultaneously, with great
                 economic, industrial, consumer, and scientific
                 potential, DWSs pose numerous technical challenges.
                 Among them, two are widely considered as crucial:
                 autonomous localized operation and minimization of
                 energy consumption. We address the fundamental problem
                 of how to maximize the lifetime of the network using
                 only local information, while preserving network
                 connectivity. We start by introducing the care-free
                 sleep (CS) Theorem that provides provably optimal
                 conditions for a node to go into sleep mode while
                 ensuring that global connectivity is not affected. The
                 CS theorem is the basis for an efficient localized
                 algorithm that decides which nodes will go to into
                 sleep mode and for how long. We have also developed
                 mechanisms for collecting neighborhood information and
                 for the coordination of distributed energy minimization
                 protocols. The effectiveness of the approach is
                 demonstrated using a comprehensive study of the
                 performance of the algorithm over a wide range of
                 network parameters. Another important highlight is the
                 first mathematical and Monte Carlo analysis that
                 establishes the importance of considering nodes within
                 a small number of hops in order to preserve energy.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ad-hoc networks; connectivity; energy management; low
                 power; power management; sleeping coordination",
}

@Article{Wagner:2007:HSI,
  author =       "Fl{\'a}vio R. Wagner and Wander Ces{\'a}rio and Ahmed
                 A. Jerraya",
  title =        "Hardware\slash software {IP} integration using the
                 {ROSES} design environment",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "17:1--17:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275989",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Considering current time-to-market pressures, IP reuse
                 is mandatory for the design of complex embedded
                 systems-on-chip (SoC). The integration of IP components
                 into a given design is the most complex task in the
                 whole reuse process. This paper describes the IP
                 integration approach implemented in the ROSES design
                 environment, which presents a unique combination of
                 features that enhance IP reuse: automatic assembly of
                 interfaces between heterogeneous software and hardware
                 IP components; easy adaptation to different on-chip
                 communication structures and bus and core standards;
                 generation of customized and minimal OSs for
                 programmable components; and an
                 architecture-independent high-level API embedded into
                 SystemC that makes application software independent
                 from system implementation. Application code is written
                 by using communication functions available in this API.
                 ROSES automatically assembles wrappers that implement
                 these functions, such that the application code does
                 not need to be modified in order to run in the final
                 synthesized system.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "IP integration; systems-on-chip",
}

@Article{Lee:2007:LBB,
  author =       "Sang-Won Lee and Dong-Joo Park and Tae-Sun Chung and
                 Dong-Ho Lee and Sangwon Park and Ha-Joo Song",
  title =        "A log buffer-based flash translation layer using
                 fully-associative sector translation",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "18:1--18:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Flash memory is being rapidly deployed as data storage
                 for mobile devices such as PDAs, MP3 players, mobile
                 phones, and digital cameras, mainly because of its low
                 electronic power, nonvolatile storage, high
                 performance, physical stability, and portability. One
                 disadvantage of flash memory is that prewritten data
                 cannot be dynamically overwritten. Before overwriting
                 prewritten data, a time-consuming erase operation on
                 the used blocks must precede, which significantly
                 degrades the overall write performance of flash memory.
                 In order to solve this ``erase-before-write'' problem,
                 the flash memory controller can be integrated with a
                 software module, called ``flash translation layer
                 (FTL).'' Among many FTL schemes available, the log
                 block buffer scheme is considered to be optimum. With
                 this scheme, a small number of log blocks, a kind of
                 write buffer, can improve the performance of write
                 operations by reducing the number of erase operations.
                 However, this scheme can suffer from low space
                 utilization of log blocks. In this paper, we show that
                 there is much room for performance improvement in the
                 log buffer block scheme, and propose an enhanced log
                 block buffer scheme, called FAST (full associative
                 sector translation). Our FAST scheme improves the space
                 utilization of log blocks using fully-associative
                 sector translations for the log block sectors. We also
                 show empirically that our FAST scheme outperforms the
                 pure log block buffer scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "address translation; associative mapping; flash
                 memory; FTL; log blocks",
}

@Article{Wu:2007:EBT,
  author =       "Chin-Hsien Wu and Tei-Wei Kuo and Li Ping Chang",
  title =        "An efficient {B-tree} layer implementation for
                 flash-memory storage systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "19:1--19:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the significant growth of the markets for
                 consumer electronics and various embedded systems,
                 flash memory is now an economic solution for storage
                 systems design. Because index structures require
                 intensively fine-grained updates/modifications,
                 block-oriented access over flash memory could introduce
                 a significant number of redundant writes. This might
                 not only severely degrade the overall performance, but
                 also damage the reliability of flash memory. In this
                 paper, we propose a very different approach, which can
                 efficiently handle fine-grained updates/modifications
                 caused by B-tree index access over flash memory. The
                 implementation is done directly over the flash
                 translation layer (FTL); hence, no modifications to
                 existing application systems are needed. We demonstrate
                 that when index structures are adopted over flash
                 memory, the proposed methodology can significantly
                 improve the system performance and, at the same time,
                 reduce both the overhead of flash-memory management and
                 the energy dissipation. The average response time of
                 record insertions and deletions was also significantly
                 reduced.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "B-tree; database systems; embedded systems; flash
                 memory; storage systems",
}

@Article{Xie:2007:ISP,
  author =       "Tao Xie and Xiao Qin",
  title =        "Improving security for periodic tasks in embedded
                 systems through scheduling",
  journal =      j-TECS,
  volume =       "6",
  number =       "3",
  pages =        "20:1--20:??",
  month =        jul,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1275986.1275992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:49:41 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While many scheduling algorithms for periodic tasks
                 ignore security requirements posed by sensitive
                 applications and are, consequently, unable to perform
                 properly in embedded systems with security constraints,
                 in this paper, we present an approach to scheduling
                 periodic tasks in embedded systems subject to security
                 and timing constraints. We design a necessary and
                 sufficient feasibility check for a set of periodic
                 tasks with security requirements. With the feasibility
                 test in place, we propose a scheduling algorithm, or
                 SASES (security-aware scheduling for embedded systems),
                 which accounts for both security and timing
                 requirements. SASES judiciously distributes slack times
                 among a variety of security services for a set of
                 periodic tasks, thereby optimizing security for
                 embedded systems without sacrificing schedulability. To
                 demonstrate the effectiveness of SASES, we apply the
                 proposed SASES to real-world embedded systems such as
                 an automated flight control system. We show, through
                 extensive simulations, that SASES is able to maximize
                 security for embedded systems while guaranteeing
                 timeliness. In particular, SASES significantly improves
                 security over three baseline algorithms by up to
                 107\%.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded systems; periodic tasks; real-time systems;
                 scheduling; security-sensitive applications",
}

@Article{Gupta:2007:ISL,
  author =       "Rajiv Gupta and Yunheung Paek",
  title =        "Introduction to the special {LCTES'05} issue",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "21:1--21:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274859",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gay:2007:SDP,
  author =       "David Gay and Philip Levis and David Culler",
  title =        "Software design patterns for {TinyOS}",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "22:1--22:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274860",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present design patterns used by software components
                 in the TinyOS sensor network operating system. They
                 differ significantly from traditional software design
                 patterns because of the constraints of sensor networks
                 and to TinyOS's focus on static allocation and
                 whole-program composition. We describe how nesC has
                 evolved to support these design patterns by including a
                 few simple language primitives and optimizations.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "design patterns; embedded systems; nesC; TinyOS",
}

@Article{Chanet:2007:ARM,
  author =       "Dominique Chanet and Bjorn {De Sutter} and Bruno {De
                 Bus} and Ludo {Van Put} and Koen {De Bosschere}",
  title =        "Automated reduction of the memory footprint of the
                 {Linux} kernel",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "23:1--23:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274861",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The limited built-in configurability of Linux can lead
                 to expensive code size overhead when it is used in the
                 embedded market. To overcome this problem, we propose
                 the application of link-time compaction and
                 specialization techniques that exploit the a priori
                 known, fixed runtime environment of many embedded
                 systems. In experimental setups based on the ARM XScale
                 and i386 platforms, the proposed techniques are able to
                 reduce the kernel memory footprint with over 16\%. We
                 also show how relatively simple additions to existing
                 binary rewriters can implement the proposed techniques
                 for a complex, very unconventional program, such as the
                 Linux kernel. We note that even after specialization, a
                 lot of seemingly unnecessary code remains in the kernel
                 and propose to reduce the footprint of this code by
                 applying code-compression techniques. This technique,
                 combined with the previous ones, reduces the memory
                 footprint with over 23\% for the i386 platform and 28\%
                 for the ARM platform. Finally, we pinpoint an important
                 code size growth problem when compaction and
                 compression techniques are combined on the ARM
                 platform.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compaction; compression; Linux kernel; operating
                 system; specialization; system calls",
}

@Article{Sassone:2007:SSS,
  author =       "Peter G. Sassone and D. Scott Wills and Gabriel H.
                 Loh",
  title =        "Static strands: {Safely} exposing dependence chains
                 for increasing embedded power efficiency",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "24:1--24:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274862",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded processors are designed to maximize
                 execution efficiency---the amount of performance
                 achieved per unit of energy dissipated while meeting
                 minimum performance levels. To increase this
                 efficiency, we propose utilizing static strands,
                 dependence chains without fan-out, which are exposed by
                 a compiler pass. These dependent instructions are
                 resequenced to be sequential and annotated to
                 communicate their location to the hardware.
                 Importantly, this modified application is binary
                 compatible and functionally identical to the original,
                 allowing transparent execution on a baseline processor.
                 However, these static strands can be easily collapsed
                 and optimized by simple processor modifications,
                 significantly reducing the workload energy. Results
                 show that over 30\% of MediaBench and Spec2000int
                 dynamic instructions can be collapsed, reducing issue
                 logic energy by 20\%, bypass energy 19\%, and register
                 file energy 14\%. In addition, by increasing the
                 effective capacity of pipeline resources by almost a
                 third, average IPC can be improved up to 15\%. This
                 performance gain can then be traded in for a lower
                 clock frequency to maintain a basline level of
                 performance, further reducing energy.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "architecture; dependency collapsing; energy;
                 sequentiality",
}

@Article{Staschulat:2007:SPC,
  author =       "Jan Staschulat and Rolf Ernst",
  title =        "Scalable precision cache analysis for real-time
                 software",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "25:1--25:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274863",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches are needed to increase the processor
                 performance, but the temporal behavior is difficult to
                 predict, especially in embedded systems with preemptive
                 scheduling. Current approaches use simplified
                 assumptions or propose complex analysis algorithms to
                 bound the cache-related preemption delay. In this
                 paper, a scalable preemption delay analysis for
                 associative instruction caches to control the analysis
                 precision and the time-complexity is proposed. An
                 accurate preemption delay calculation is integrated
                 into a cache-aware schedulability analysis. The
                 framework is evaluated in several experiments.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "cache; embedded systems; preemptive scheduling;
                 worst-case execution time analysis",
}

@Article{Varma:2007:AFS,
  author =       "Ankush Varma and Bruce Jacob and Eric Debes and Igor
                 Kozintsev and Paul Klein",
  title =        "Accurate and fast system-level power modeling: an
                 {XScale}-based case study",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "26:1--26:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274864",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Accurate and fast system modeling is central to the
                 rapid design space exploration needed for
                 embedded-system design. With fast, complex SoCs playing
                 a central role in such systems, system designers have
                 come to require MIPS-range simulation speeds and
                 near-cycle accuracy. The sophisticated simulation
                 frameworks that have been developed for high-speed
                 system performance modeling do not address power
                 consumption, although it is a key design constraint. In
                 this paper, we define a simulation-based methodology
                 for extending system performance-modeling frameworks to
                 also include power modeling. We demonstrate the use of
                 this methodology with a case study of a real, complex
                 embedded system, comprising the Intel XScale{\reg}g
                 embedded microprocessor, its WMMX{\trademark} SIMD
                 coprocessor, L1 caches, SDRAM and the on-board address
                 and data buses. We describe detailed power models for
                 each of these components and validate them against
                 physical measurements from hardware, demonstrating that
                 such frameworks enable designers to model both power
                 and performance at high speeds without sacrificing
                 accuracy. Our results indicate that the power estimates
                 obtained are accurate within 5\% of physical
                 measurements from hardware, while simulation speeds
                 consistently exceed a million instructions per second
                 (MIPS).",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded systems; power modeling; SystemC",
}

@Article{Carta:2007:CTA,
  author =       "Salvatore Carta and Andrea Alimonda and Alessandro
                 Pisano and Andrea Acquaviva and Luca Benini",
  title =        "A control theoretic approach to energy-efficient
                 pipelined computation in {MPSoCs}",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "27:1--27:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274865",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, we describe a control theoretic approach
                 to dynamic voltage/frequency scaling (DVFS) in a
                 pipelined MPSoC architecture with soft real-time
                 constraints, aimed at minimizing energy consumption
                 with throughput guarantees. Theoretical analysis and
                 experiments carried out on a cycle-accurate,
                 energy-aware, and multiprocessor simulation platform
                 are provided. We give a dynamic model of the system
                 behavior which allows to synthesize linear and
                 nonlinear feedback control schemes for the run-time
                 adjustment of the core frequencies. We study the
                 characteristics of the proposed techniques in both
                 transient and steady-state conditions. Finally, we
                 compare the proposed feedback approaches and local DVFS
                 policies from an energy consumption viewpoint.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "DVFS; feedback-control techniques; MPSoC; parallel
                 systems",
}

@Article{Crenshaw:2007:RIE,
  author =       "Tanya L. Crenshaw and Spencer Hoke and Ajay Tirumala
                 and Marco Caccamo",
  title =        "Robust implicit {EDF}: a wireless {MAC} protocol for
                 collaborative real-time systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "28:1--28:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274866",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in wireless technology have brought us closer
                 to extensive deployment of distributed real-time
                 embedded systems connected through a wireless channel.
                 The medium-access control (MAC) layer protocol is
                 critical in providing a real-time guarantee. We have
                 devised a real-time wireless MAC protocol, robust
                 implicit earliest deadline first, or RI-EDF. Packets
                 are transmitted according to EDF scheduling rules,
                 offering a protocol that implicitly avoids contention.
                 In the event of a packet loss or a node failure, every
                 node has the opportunity to recover the schedule based
                 on a static recovery priority, offering a protocol that
                 is robust with no central point of failure. We
                 demonstrate in simulations that RI-EDF provides better
                 goodput and lower packet loss than existing protocols
                 like 802.11 PCF and EDCF. In our implementation and
                 distributed control test-bed, we show that RI-EDF
                 provides better throughput than the TinyOS MAC-layer
                 protocol. Overall, RI-EDF provides predictable temporal
                 behavior with minimal impact on node failures, packet
                 losses, and noise in the channel.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "earliest deadline first; medium-access control; real
                 time; wireless",
}

@Article{Quan:2007:EED,
  author =       "Gang Quan and Xiaobo Sharon Hu",
  title =        "Energy efficient {DVS} schedule for fixed-priority
                 real-time systems",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "29:1--29:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274867",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy consumption has become an increasingly
                 important consideration in designing many real-time
                 embedded systems. Variable voltage processors, if used
                 properly, can dramatically reduce such system energy
                 consumption. In this paper, we present a technique to
                 determine voltage settings for a variable voltage
                 processor that utilizes a fixed-priority assignment to
                 schedule jobs. By exploiting more efficiently the
                 processor slack time, our approach can be more
                 effective in reducing the execution speed for real-time
                 tasks when necessary. Our approach also produces the
                 minimum constant voltage needed to feasibly schedule
                 the entire job set. With both randomly generated and
                 practical examples, our heuristic approach can achieve
                 the dynamic energy reduction very close to the
                 theoretically optimal one (within 2\%) with much less
                 computation cost.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "dynamic voltage scaling; fixed-priority scheduling;
                 low power; real time",
}

@Article{Rao:2007:EOS,
  author =       "Ravishankar Rao and Sarma Vrudhula",
  title =        "Energy optimal speed control of a producer--consumer
                 device pair",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "30:1--30:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274868",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a modular approach for minimizing the total
                 energy consumed by a pair of generic communicating
                 devices (producer--consumer scenario) by jointly
                 controlling their speed profiles. Each device (like a
                 CPU, or disk drive) is assumed to have a controllable
                 variable called its speed (e.g., a CPU's clock
                 frequency, a disk drive's spindle motor speed) that
                 affects its power consumption and performance (e.g.,
                 throughput, data transfer rate). The device and task
                 models we analyzed were inspired by applications like
                 CD recording (hard drive to CD drive data transfer) and
                 data processing (disk drive to CPU data transfer). The
                 proposed solution can be used for any pair of devices
                 with convex (for continuous speed sets) or W-convex (a
                 discrete version of a convex function for discrete
                 speed sets) power--speed relationships. For discrete
                 speed sets, the method operates directly on the
                 power--speed values and does not require an analytical
                 relationship between power and speed. The key to
                 solving the two-device optimization problem was the
                 observation that it could be split into two single
                 device parametric optimization problems, where the
                 parameters correspond to the common task that both the
                 devices must execute. The following divide-and-conquer
                 approach is proposed: [divide] the optimal speed policy
                 and energy consumption of each device is derived as an
                 analytical function of its task parameters; [conquer]
                 the optimal values of these parameters are found by
                 minimizing the sum of the parameterized energy
                 functions and plugged back into the parameterized speed
                 profiles. The main advantage of this approach is that
                 each device can be characterized independently and this
                 allows system designers to mix and match
                 manufacturer-supplied device energy curves to evaluate
                 and optimize different application scenarios. We
                 demonstrate our approach using three device
                 characterization examples (for a CD drive, hard drive,
                 and a CPU) and two application scenarios (CD recording,
                 MD5 checksum computation).",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "disk drive; energy optimization; joint optimization;
                 processor; speed control",
}

@Article{Loghi:2007:PMM,
  author =       "Mirko Loghi and Luca Benini and Massimo Poncino",
  title =        "Power macromodeling of {MPSoC} message passing
                 primitives",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "31:1--31:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274869",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Estimating the energy consumption of software in
                 multiprocessor systems-on-chip (MPSoCs) is crucial for
                 enabling quick evaluations of both software and
                 hardware optimizations. However, high-level estimations
                 should be applicable at software level, possibly
                 constructing effective power models depending on
                 parameters that can be extracted directly from the
                 application characteristics. We propose a methodology
                 for accurate analysis of power consumption of
                 message-passing primitives in a MPSoC, and, in
                 particular, an energy model which, in spite of its
                 simplicity, allows to model the traffic-dependent
                 nature of energy consumption through the use of a
                 single, abstract parameter, namely, the size of the
                 message exchanged.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "communication primitives; macromodeling;
                 multiprocessor; system-on-chip",
}

@Article{Kansal:2007:PME,
  author =       "Aman Kansal and Jason Hsu and Sadaf Zahedi and Mani B.
                 Srivastava",
  title =        "Power management in energy harvesting sensor
                 networks",
  journal =      j-TECS,
  volume =       "6",
  number =       "4",
  pages =        "32:1--32:??",
  month =        sep,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1274858.1274870",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Power management is an important concern in sensor
                 networks, because a tethered energy infrastructure is
                 usually not available and an obvious concern is to use
                 the available battery energy efficiently. However, in
                 some of the sensor networking applications, an
                 additional facility is available to ameliorate the
                 energy problem: harvesting energy from the environment.
                 Certain considerations in using an energy harvesting
                 source are fundamentally different from that in using a
                 battery, because, rather than a limit on the maximum
                 energy, it has a limit on the maximum rate at which the
                 energy can be used. Further, the harvested energy
                 availability typically varies with time in a
                 nondeterministic manner. While a deterministic metric,
                 such as residual battery, suffices to characterize the
                 energy availability in the case of batteries, a more
                 sophisticated characterization may be required for a
                 harvesting source. Another issue that becomes important
                 in networked systems with multiple harvesting nodes is
                 that different nodes may have different harvesting
                 opportunity. In a distributed application, the same
                 end-user performance may be achieved using different
                 workload allocations, and resultant energy consumptions
                 at multiple nodes. In this case, it is important to
                 align the workload allocation with the energy
                 availability at the harvesting nodes. We consider the
                 above issues in power management for energy-harvesting
                 sensor networks. We develop abstractions to
                 characterize the complex time varying nature of such
                 sources with analytically tractable models and use them
                 to address key design issues. We also develop
                 distributed methods to efficiently use harvested energy
                 and test these both in simulation and experimentally on
                 an energy-harvesting sensor network, prototyped for
                 this work.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "adaptive duty cycling; energy neutrality; Heliomote;
                 lifetime; power management",
}

@Article{Bueno:2007:RRP,
  author =       "David Bueno and Chris Conger and Alan D. George and
                 Ian Troxel and Adam Leko",
  title =        "{RapidIO} for radar processing in advanced space
                 systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:38",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324970",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Space-based radar is a suite of applications that
                 presents many unique system design challenges. In this
                 paper, we investigate use of RapidIO, a new
                 high-performance embedded systems interconnect, in
                 addressing issues associated with the high network
                 bandwidth requirements of real-time ground moving
                 target indicator (GMTI), and synthetic aperture Radar
                 (SAR) applications in satellite systems. Using
                 validated simulation, we study several critical issues
                 related to the RapidIO network and algorithms under
                 study. The results show that RapidIO is a promising
                 platform for space-based radar using emerging
                 technology, providing network bandwidth to enable
                 parallel computation previously unattainable in an
                 embedded satellite system.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ground-moving target indicator; RapidIO; space-based
                 radar; synthetic aperture radar",
}

@Article{Fei:2007:EOS,
  author =       "Yunsi Fei and Srivaths Ravi and Anand Raghunathan and
                 Niraj K. Jha",
  title =        "Energy-optimizing source code transformations for
                 operating system-driven embedded software",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:26",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324971",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes four types of source code
                 transformations for operating system (OS)-driven
                 embedded software programs to reduce their energy
                 consumption. Their key features include spanning of
                 process boundaries and minimization of the energy
                 consumed in the execution of OS
                 services---opportunities which are beyond the reach of
                 conventional compiler optimizations and source code
                 transformations. We have applied the proposed
                 transformations to several multiprocess benchmark
                 programs in the context of an embedded Linux OS running
                 on an Intel StrongARM processor. They achieve up to
                 37.9\% (23.8\%, on average) energy reduction compared
                 to highly compiler-optimized implementations.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "energy consumption; Linux; source code
                 transformations",
}

@Article{Zhu:2007:ESA,
  author =       "Yifan Zhu and Frank Mueller",
  title =        "Exploiting synchronous and asynchronous {DVS} for
                 feedback {EDF} scheduling on an embedded platform",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:26",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324972",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Contemporary processors support dynamic voltage
                 scaling (DVS) to reduce power consumption by varying
                 processor voltage/frequency dynamically. We develop
                 power-aware feedback--DVS algorithms for hard real-time
                 systems that adapt to dynamically changing workloads.
                 The algorithms lower execution speed while guaranteeing
                 timing constraints. We study energy consumption for
                 synchronous and asynchronous DVS switching on a PowerPC
                 board. Energy, measured via data acquisition, is
                 reduced up to 70\% over na{\"\i}ve DVS for our feedback
                 scheme with 24\% peak savings over previous algorithms.
                 These results, albeit differing in quantity, confirm
                 trends observed under simulation. They are the first of
                 their kind on an embedded board.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "dynamic voltage scaling; feedback control; real-time
                 systems; scheduling",
}

@Article{Vera:2007:DCL,
  author =       "Xavier Vera and Bj{\"o}rn Lisper and Jingling Xue",
  title =        "Data cache locking for tight timing calculations",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "4:1--4:38",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324973",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Caches have become increasingly important with the
                 widening gap between main memory and processor speeds.
                 Small and fast cache memories are designed to bridge
                 this discrepancy. However, they are only effective when
                 programs exhibit sufficient data locality. In addition,
                 caches are a source of unpredictability, resulting in
                 programs sometimes behaving in a different way than
                 expected. Detailed information about the number of
                 cache misses and their causes allows us to predict
                 cache behavior and to detect bottlenecks. Small
                 modifications in the source code may change memory
                 patterns, thereby altering the cache behavior. Code
                 transformations, which take the cache behavior into
                 account, might result in a high cache performance
                 improvement. However, cache memory behavior is very
                 hard to predict, thus making the task of optimizing and
                 timing cache behavior very difficult. This article
                 proposes and evaluates a new compiler framework that
                 times cache behavior for multitasking systems. Our
                 method explores the use of cache partitioning and
                 dynamic cache locking to provide worst-case performance
                 estimates in a safe and tight way for multitasking
                 systems. We use cache partitioning, which divides the
                 cache among tasks to eliminate intertask cache
                 interferences. We combine static cache analysis and
                 cache-locking mechanisms to ensure that all intratask
                 conflicts, and consequently, memory access times, are
                 exactly predictable. The results of our experiments
                 demonstrate the capability of our framework to describe
                 cache behavior at compile time. We compare our timing
                 approach with a system equipped with a nonpartitioned,
                 but statically, locked data cache. Our method
                 outperforms static cache locking for all analyzed task
                 sets under various cache architectures, demonstrating
                 that our fully predictable scheme does not compromise
                 the performance of the transformed programs.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "data cache analysis; embedded systems; safety critical
                 systems; worst-case execution time",
}

@Article{Armbruster:2007:RTJ,
  author =       "Austin Armbruster and Jason Baker and Antonio Cunei
                 and Chapman Flack and David Holmes and Filip Pizlo and
                 Edward Pla and Marek Prochazka and Jan Vitek",
  title =        "A real-time {Java} virtual machine with applications
                 in avionics",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "5:1--5:49",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324974",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper reports on our experience with the
                 implementation of the Real-time Specification for Java
                 on the Ovm open source Java virtual machine. We
                 describe the architecture and main design decisions
                 involved in implementing real-time Java on Ovm. We
                 present the first use of Real-time Java in avionics in
                 the context of control software for a ScanEagle
                 Unmanned Aerial Vehicle.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "avionics; memory management; Real-Time Java; virtual
                 machines",
}

@Article{Mangeruca:2007:USU,
  author =       "Leonardo Mangeruca and Massimo Baleani and Alberto
                 Ferrari and Alberto Sangiovanni-Vincentelli",
  title =        "Uniprocessor scheduling under precedence constraints
                 for embedded systems design",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "6:1--6:30",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324975",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this paper, we present a novel approach to the
                 constrained scheduling problem, while addressing a more
                 general class of constraints that arise from the timing
                 requirements on real-time embedded controllers. We
                 provide general necessary and sufficient conditions for
                 scheduling under precedence constraints and derive
                 sufficient conditions for two well-known scheduling
                 policies. We define mathematical problems that provide
                 optimum priority and deadline assignments, while
                 ensuring both precedence constraints and system's
                 schedulability. We show how these problems can be
                 relaxed to corresponding integer linear programming
                 (ILP) formulations leveraging on available solvers. The
                 results are demonstrated on a real design case.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "design of embedded systems; embedded software;
                 precedence constraints; real-time scheduling",
}

@Article{Bordoloi:2007:ISA,
  author =       "Unmesh D. Bordoloi and Samarjit Chakraborty",
  title =        "Interactive schedulability analysis",
  journal =      j-TECS,
  volume =       "7",
  number =       "1",
  pages =        "7:1--7:27",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1324969.1324976",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:21:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A typical design process for real-time embedded
                 systems involves choosing the values of certain system
                 parameters and performing a schedulability analysis to
                 determine whether all deadline constraints can be
                 satisfied. If such an analysis returns a negative
                 answer, then some of the parameters are modified and
                 the analysis is invoked once again. This iteration is
                 repeated until a schedulable design is obtained.
                 However, the schedulability analysis problem for most
                 task models is intractable (usually co-NP hard) and,
                 hence, such an iterative design process is often very
                 expensive. To get around this problem, we introduce the
                 concept of ``interactive'' schedulability analysis. It
                 is based on the observation that if only a small number
                 of system parameters are changed, then it is not
                 necessary to rerun the full schedulability analysis
                 algorithm, thereby making the iterative design process
                 considerably faster. We refer to this analysis as being
                 ``interactive'' because it is supposed to be run in an
                 interactive mode. This concept is fairly general and
                 can be applied to a wide variety of task models. In
                 this paper, we have chosen the recurring real-time task
                 model, because it can be used to represent realistic
                 applications from the embedded systems domain
                 (containing conditional branches and fine-grained
                 deadline constraints). Our experimental results show
                 that using our scheme can lead to more than 20{\times}
                 speedup for each invocation of the schedulability
                 analysis algorithm, compared to the case where the full
                 algorithm is run.",
  acknowledgement = ack-nhfb,
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "interactive design; nonfunctional constraints;
                 performance debugging; recurring real-time task model;
                 schedulability analysis",
}

@Article{Ha:2008:IES,
  author =       "Soonhoi Ha and Kiyoung Choi and Taewhan Kim and
                 Krisztian Flautner and Sanglyul Min and Wang Yi",
  title =        "Introduction to embedded systems week 2006 special
                 issue",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331332",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2008:EAC,
  author =       "Minyoung Kim and Sudarshan Banerjee and Nikil Dutt and
                 Nalini Venkatasubramanian",
  title =        "Energy-aware cosynthesis of real-time multimedia
                 applications on {MPSoCs} using heterogeneous scheduling
                 policies",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331333",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time multimedia applications are increasingly
                 being mapped onto MPSoC (multiprocessor system-on-chip)
                 platforms containing hardware--software IPs
                 (intellectual property), along with a library of common
                 scheduling policies such as EDF, RM. The choice of a
                 scheduling policy for each IP is a key decision that
                 greatly affects the design's ability to meet real-time
                 constraints, and also directly affects the energy
                 consumed by the design. We present a cosynthesis
                 framework for design space exploration that considers
                 heterogeneous scheduling while mapping multimedia
                 applications onto such MPSoCs. In our approach, we
                 select a suitable scheduling policy for each IP such
                 that system energy is minimized---our framework also
                 includes energy-reduction techniques utilizing dynamic
                 power management. Experimental results on a realistic
                 multimode multimedia terminal application demonstrate
                 that our approach enables us to select design points
                 with up to 60.5\% reduced energy for a given area
                 constraint, while meeting all real-time requirements.
                 More importantly, our approach generates a tradeoff
                 space between energy and cost allowing designers to
                 comparatively evaluate multiple system level
                 mappings.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "cosynthesis; energy; MPSoC; real-time scheduling",
}

@Article{Raman:2008:ASW,
  author =       "Balaji Raman and Samarjit Chakraborty",
  title =        "Application-specific workload shaping in
                 multimedia-enabled personal mobile devices",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331334",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today, most personal mobile devices (e.g., cell phones
                 and PDAs) are multimedia-enabled and support a variety
                 of concurrently running applications, such as
                 audio/video players, word processors, and web browsers.
                 Media-processing applications are often computationally
                 expensive and most of these devices typically have
                 100--400-MHz processors. As a result, the
                 user-perceived application response times are often
                 poor when multiple applications are concurrently fired.
                 In this paper, we show that by using
                 application-specific dynamic buffering techniques, the
                 workload of these applications can be suitably
                 ``shaped'' to fit the available processor bandwidth.
                 Our techniques are analogous to traffic shaping, which
                 is widely used in communication networks to optimally
                 utilize network bandwidth. Such shaping techniques have
                 recently attracted a lot of attention in the context of
                 embedded systems design (e.g., for dynamic voltage
                 scaling). However, they have not been exploited for
                 enhanced schedulability of multiple applications, as we
                 do in this paper.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "mobile devices; multimedia systems; schedulability
                 analysis",
}

@Article{Egger:2008:DSM,
  author =       "Bernhard Egger and Jaejin Lee and Heonshik Shin",
  title =        "Dynamic scratchpad memory management for code in
                 portable systems with an {MMU}",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331335",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this work, we present a dynamic memory allocation
                 technique for a novel, horizontally partitioned memory
                 subsystem targeting contemporary embedded processors
                 with a memory management unit (MMU). We propose to
                 replace the on-chip instruction cache with a scratchpad
                 memory (SPM) and a small minicache. Serializing the
                 address translation with the actual memory access
                 enables the memory system to access either only the SPM
                 or the minicache. Independent of the SPM size and based
                 solely on profiling information, a postpass optimizer
                 classifies the code of an application binary into a
                 pageable and a cacheable code region. The latter is
                 placed at a fixed location in the external memory and
                 cached by the minicache. The former, the pageable code
                 region, is copied on demand to the SPM before
                 execution. Both the pageable code region and the SPM
                 are logically divided into pages the size of an MMU
                 memory page. Using the MMU's pagefault exception
                 mechanism, a runtime scratchpad memory manager (SPMM)
                 tracks page accesses and copies frequently executed
                 code pages to the SPM before they get executed. In
                 order to minimize the number of page transfers from the
                 external memory to the SPM, good code placement
                 techniques become more important with increasing sizes
                 of the MMU pages. We discuss code-grouping techniques
                 and provide an analysis of the effect of the MMU's page
                 size on execution time, energy consumption, and
                 external memory accesses. We show that by using the
                 data cache as a victim buffer for the SPM, significant
                 energy savings are possible. We evaluate our SPM
                 allocation strategy with fifteen applications,
                 including H.264, MP3, MPEG-4, and PGP. The proposed
                 memory system requires 8\% less die are compared to a
                 fully-cached configuration. On average, we achieve a
                 31\% improvement in runtime performance and a 35\%
                 reduction in energy consumption with an MMU page size
                 of 256 bytes.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "code placement; compilers; heterogeneous memory;
                 paging; portable systems; postpass optimization;
                 scratchpad; victim cache; virtual memory",
}

@Article{Scholz:2008:MPB,
  author =       "Bernhard Scholz and Bernd Burgstaller and Jingling
                 Xue",
  title =        "Minimal placement of bank selection instructions for
                 partitioned memory architectures",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331336",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We have devised an algorithm for minimal placement of
                 bank selections in partitioned memory architectures.
                 This algorithm is parameterizable for a chosen metric,
                 such as speed, space, or energy. Bank switching is a
                 technique that increases the code and data memory in
                 microcontrollers without extending the address buses.
                 Given a program in which variables have been assigned
                 to data banks, we present a novel optimization
                 technique that minimizes the overhead of bank switching
                 through cost-effective placement of bank selection
                 instructions. The placement is controlled by a number
                 of different objectives, such as runtime, low power,
                 small code size or a combination of these parameters.
                 We have formulated the minimal placement of bank
                 selection instructions as a discrete optimization
                 problem that is mapped to a partitioned boolean
                 quadratic programming (PBQP) problem. We implemented
                 the optimization as part of a PIC Microchip backend and
                 evaluated the approach for several optimization
                 objectives. Our benchmark suite comprises programs from
                 MiBench and DSPStone plus a microcontroller real-time
                 kernel and drivers for microcontroller hardware
                 devices. Our optimization achieved a reduction in
                 program memory space of between 2.7 and 18.2\%, and an
                 overall improvement with respect to instruction cycles
                 between 5.0 and 28.8\%. Our optimization achieved the
                 minimal solution for all benchmark programs. We
                 investigated the scalability of our approach toward the
                 requirements of future generations of microcontrollers.
                 This study was conducted as a worst-case analysis on
                 the entire MiBench suite. Our results show that our
                 optimization (1) scales well to larger numbers of
                 memory banks, (2) scales well to the larger problem
                 sizes that will become feasible with future
                 microcontrollers, and (3) achieves minimal placement
                 for more than 72\% of all functions from MiBench.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "bank selection; partitioned Boolean quadratic
                 programming; partitioned memory architectures",
}

@Article{Choi:2008:SHM,
  author =       "Yoonseo Choi and Hwansoo Han",
  title =        "Shared heap management for memory-limited {Java}
                 virtual machines",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One scarce resource in embedded systems is memory.
                 Multitasking makes the lack of memory problem even
                 worse. Most current embedded systems, which do not
                 provide virtual memory, simply divide physical memory
                 and evenly assign contiguous memory chunks to multiple
                 applications. Such simple memory management can
                 frequently cause the lack of available memory for some
                 applications, while others are not using the full
                 amount of assigned memory. To overcome inefficiency in
                 current memory management, we present an efficient heap
                 management scheme that allows multiple applications to
                 share heap space. To reduce overall heap memory usage,
                 applications adaptively acquire subheaps out of shared
                 pool of memory and release surplus subheaps to shared
                 pool. As a result, applications see noncontiguous
                 multiple subheaps as a heap in their address space. We
                 target Java applications to implement our heap-sharing
                 scheme in the KVM from Sun Microsystems. To protect
                 fragmented heap space with a limited number of regions
                 in memory protection unit (MPU), we maintain only a
                 limited number of subheaps. We experimentally evaluate
                 our heap management scheme with J2ME MIDP applications.
                 Our static and dynamic schemes reduce heap memory
                 usage, on average, by 30 and 27\%, respectively. For
                 both schemes, overheads are kept low. The execution
                 times in our schemes are increased only by 0.01\% for
                 static scheme and 0.35\% for dynamic scheme, on
                 average.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "dynamic memory management; garbage collection; heap
                 sharing; memory protection unit",
}

@Article{So:2008:UHS,
  author =       "Hayden Kwok-Hay So and Robert Brodersen",
  title =        "A unified hardware\slash software runtime environment
                 for {FPGA}-based reconfigurable computers using
                 {BORPH}",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper explores the design and implementation of
                 BORPH, an operating system designed for FPGA-based
                 reconfigurable computers. Hardware designs execute as
                 normal UNIX processes under BORPH, having access to
                 standard OS services, such as file system support.
                 Hardware and software components of user designs may,
                 therefore, run as communicating processes within
                 BORPH's runtime environment. The familiar language
                 independent UNIX kernel interface facilitates easy
                 design reuse and rapid application development. To
                 develop hardware designs, a Simulink-based design flow
                 that integrates with BORPH is employed. Performances of
                 BORPH on two on-chip systems implemented on a BEE2
                 platform are compared.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "BORPH; FPGA; hardware process; reconfigurable
                 computers",
}

@Article{Caspi:2008:SPM,
  author =       "Paul Caspi and Norman Scaife and Christos Sofronis and
                 Stavros Tripakis",
  title =        "Semantics-preserving multitask implementation of
                 synchronous programs",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We study the implementation of a synchronous program
                 as a set of multiple tasks running on the same
                 computer, and scheduled by a real-time operating system
                 using some preemptive scheduling policy, such as fixed
                 priority or earliest-deadline first. Multitask
                 implementations are necessary, for instance, in
                 multiperiodic applications, when the worst-case
                 execution time of the program is larger than its
                 smallest period. In this case, a single-task
                 implementation violates the schedulability assumption
                 and, therefore, the synchrony hypothesis does not hold.
                 We are aiming at semantics-preserving implementations,
                 where, for a given input sequence, the output sequence
                 produced by the implementation is the same as that
                 produced by the original synchronous program, and this
                 under all possible executions of the implementation.
                 Straightforward implementation techniques are not
                 semantics-preserving. We present an intertask
                 communication protocol, called DBP, that is
                 semantics-preserving and memory-optimal. DBP guarantees
                 semantical preservation under all possible triggering
                 patterns of the synchronous program: thus, it is
                 applicable not only to time-, but also event-triggered
                 applications. DBP works under both fixed priority and
                 earliest-deadline first scheduling. DBP is a
                 nonblocking protocol based on the use of intermediate
                 buffers and manipulations of write-to/read-from
                 pointers to these buffers: these manipulations happen
                 upon arrivals, rather than executions of tasks, which
                 is a distinguishing feature of DBP. DBP is
                 memory-optimal in the sense that it uses as few buffers
                 as needed, for any given triggering pattern. In the
                 worst case, DBP requires, at most, $N + 2$ buffers for
                 each writer, where $N$ is the number of readers for
                 this writer.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded software; model-based design; optimality;
                 preemptive scheduling; process communication;
                 semantical preservation; synchronous programming",
}

@Article{Liu:2008:HPP,
  author =       "Duo Liu and Zheng Chen and Bei Hua and Nenghai Yu and
                 Xinan Tang",
  title =        "High-performance packet classification algorithm for
                 multithreaded {IXP} network processor",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Packet classification is crucial for the Internet to
                 provide more value-added services and guaranteed
                 quality of service. Besides hardware-based solutions,
                 many software-based classification algorithms have been
                 proposed. However, classifying at 10 Gbps speed or
                 higher is a challenging problem and it is still one of
                 the performance bottlenecks in core routers. In
                 general, classification algorithms face the same
                 challenge of balancing between high classification
                 speed and low memory requirements. This paper proposes
                 a modified recursive flow classification (RFC)
                 algorithm, Bitmap-RFC, which significantly reduces the
                 memory requirements of RFC by applying a bitmap
                 compression technique. To speed up classifying speed,
                 we exploit the multithreaded architectural features in
                 various algorithm development stages from algorithm
                 design to algorithm implementation. As a result,
                 Bitmap-RFC strikes a good balance between speed and
                 space. It can significantly keep both high
                 classification speed and reduce memory space
                 consumption. This paper investigates the main NPU
                 software design aspects that have dramatic performance
                 impacts on any NPU-based implementations: memory space
                 reduction, instruction selection, data allocation, task
                 partitioning, and latency hiding. We experiment with an
                 architecture-aware design principle to guarantee the
                 high performance of the classification algorithm on an
                 NPU implementation. The experimental results show that
                 the Bitmap-RFC algorithm achieves 10 Gbps speed or
                 higher and has a good scalability on Intel IXP2800
                 NPU.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "architecture; embedded system design; multithreading;
                 network processor; packet classification; thread-level
                 parallelism",
}

@Article{Zhuo:2008:EED,
  author =       "Jianli Zhuo and Chaitali Chakrabarti",
  title =        "Energy-efficient dynamic task scheduling algorithms
                 for {DVS} systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic voltage scaling (DVS) is a well-known
                 low-power design technique that reduces the processor
                 energy by slowing down the DVS processor and stretching
                 the task execution time. However, in a DVS system
                 consisting of a DVS processor and multiple devices,
                 slowing down the processor increases the device energy
                 consumption and thereby the system-level energy
                 consumption. In this paper, we first use system-level
                 energy consideration to derive the ``optimal '' scaling
                 factor by which a task should be scaled if there are no
                 deadline constraints. Next, we develop dynamic
                 task-scheduling algorithms that make use of dynamic
                 processor utilization and optimal scaling factor to
                 determine the speed setting of a task. We present
                 algorithm duEDF, which reduces the CPU energy
                 consumption and algorithm duSYS and its reduced
                 preemption version, duSYS_PC, which reduce the
                 system-level energy. Experimental results on the
                 video-phone task set show that when the CPU power is
                 dominant, algorithm duEDF results in up to 45\% energy
                 savings compared to the non-DVS case. When the CPU
                 power and device power are comparable, algorithms duSYS
                 and duSYS_PC achieve up to 25\% energy saving compared
                 to CPU energy-efficient algorithm duEDF, and up to 12\%
                 energy saving over the non-DVS scheduling algorithm.
                 However, if the device power is large compared to the
                 CPU power, then we show that a DVS scheme does not
                 result in lowest energy. Finally, a comparison of the
                 performance of algorithms duSYS and duSYS_PC show that
                 preemption control has minimal effect on system-level
                 energy reduction.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "DVS system; dynamic task scheduling; energy
                 minimization; optimal scaling factor; real time",
}

@Article{Lee:2008:DFR,
  author =       "Sheayun Lee and Insik Shin and Woonseok Kim and Insup
                 Lee and Sang Lyul Min",
  title =        "A design framework for real-time embedded systems with
                 code size and energy constraints",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time embedded systems are typically constrained
                 in terms of three system performance criteria: space,
                 time, and energy. The performance requirements are
                 directly translated into constraints imposed on the
                 system's resources, such as code size, execution time,
                 and energy consumption. These resource constraints
                 often interact or even conflict with each other in a
                 complex manner, making it difficult for a system
                 developer to apply a well-defined design methodology in
                 developing a real-time embedded system. Motivated by
                 this observation, we propose a design framework that
                 can flexibly balance the tradeoff involving the
                 system's code size, execution time, and energy
                 consumption. Given a system specification and an
                 optimization criteria, the proposed technique generates
                 a set of design parameters in such a way that a system
                 cost function is minimized while the given resource
                 constraints are satisfied. Specifically, the technique
                 derives code generation decision for each task so that
                 a specific version of code is selected among a number
                 of different ones that have distinct characteristics in
                 terms of code size and execution time. In addition, the
                 design framework determines the voltage/frequency
                 setting for a variable voltage processor whose supply
                 voltage can be adjusted at runtime in order to minimize
                 the energy consumption while execution performance is
                 degraded accordingly. The proposed technique formulates
                 this design process as a constrained optimization
                 problem. We show that this optimization problem is
                 NP-hard and then provide a heuristic solution to it. We
                 show that these seemingly conflicting design goals can
                 be pursued by using a simple optimization algorithm
                 that works with a single optimization criteria.
                 Moreover, the optimization is driven by an abstract
                 system specification given by the system developer, so
                 that the system development process can be automated.
                 The results from our simulation show that the proposed
                 algorithm finds a solution that is close to the optimal
                 one with the average error smaller than 1.0\%.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "code size; embedded; energy; real-time; scheduling",
}

@Article{Manolache:2008:TMP,
  author =       "Sorin Manolache and Petru Eles and Zebo Peng",
  title =        "Task mapping and priority assignment for soft
                 real-time applications under deadline miss ratio
                 constraints",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Both analysis and design optimisation of real-time
                 systems has predominantly concentrated on considering
                 hard real-time constraints. For a large class of
                 applications, however, this is both unrealistic and
                 leads to unnecessarily expensive implementations. This
                 paper addresses the problem of task priority assignment
                 and task mapping in the context of multiprocessor
                 applications with stochastic execution times and in the
                 presence of constraints on the percentage of missed
                 deadlines. We propose a design space exploration
                 strategy together with a fast method for system
                 performance analysis. Experiments emphasize the
                 efficiency of the proposed analysis method and
                 optimisation heuristic in generating high-quality
                 implementations of soft real-time systems with
                 stochastic task execution times and constraints on
                 deadline miss ratios.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "mapping; priority assignment; schedulability analysis;
                 soft real-time systems; stochastic task execution
                 times",
}

@Article{Park:2008:SRB,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "Secure routing based on distributed key sharing in
                 large-scale sensor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sensor networks, usually built with a large number of
                 small, low-cost sensor nodes, are characterized by
                 their large-scale and unattended deployment,
                 necessitating ``secure'' communications between nearby,
                 as well as remote, sensor nodes for their intended
                 applications and services. Key setup/sharing is crucial
                 to the protection of such applications/services from
                 attacks, but existing (public-key, cluster-based, or
                 pairwise) solutions become too expensive (hence,
                 inefficient) when the underlying applications/services
                 require communications between distant sensor nodes. To
                 remedy this inefficiency, we propose a novel
                 distributed key-sharing scheme, in which each
                 participating sensor node shares unique keys with a
                 small number of other sensor nodes---called distributed
                 key servers (DKSs)---chosen according to their
                 geographic distance and communication direction. Using
                 DKSs, we develop two secure routing protocols: (1)
                 secure geographic forwarding that delivers packets by
                 using a chain of DKS lookups, each secured with its own
                 key and forwarded geographically; and (2) key
                 establishment that creates a secure session between two
                 distant sensor nodes based solely on symmetric-ciphers.
                 These protocols enable low-cost, low-power sensors to
                 provide high-level security at a very low cost.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "attack tolerance; distributed key sharing and servers;
                 key establishment; large-scale sensor networks; secure
                 geographic forwarding",
}

@Article{Cho:2008:DNP,
  author =       "Young H. Cho and William H. Mangione-Smith",
  title =        "Deep network packet filter design for reconfigurable
                 devices",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most network routers and switches provide some
                 protection against the network attacks. However, the
                 rapidly increasing amount of damages reported over the
                 past few years indicates the urgent need for tougher
                 security. Deep-packet inspection is one of the
                 solutions to capture packets that can not be identified
                 using the traditional methods. It uses a list of
                 signatures to scan the entire content of the packet,
                 providing the means to filter harmful packets out of
                 the network. Since one signature does not depend on the
                 other, the filtering process has a high degree of
                 parallelism. Most software and hardware deep-packet
                 filters that are in use today execute the tasks under
                 Von Neuman architecture. Such architecture can not
                 fully take advantage of the parallelism. For instance,
                 one of the most widely used network intrusion-detection
                 systems, Snort, configured with 845 patterns, running
                 on a dual 1-GHz Pentium III system, can sustain a
                 throughput of only 50 Mbps. The poor performance is
                 because of the fact that the processor is programmed to
                 execute several tasks sequentially instead of
                 simultaneously. We designed scalable deep-packet
                 filters on field-programmable gate arrays (FPGAs) to
                 search for all data-independent patterns
                 simultaneously. With FPGAs, we have the ability to
                 reprogram the filter when there are any changes to the
                 signature set. The smallest full-pattern matcher
                 implementation for the latest Snort NIDS fits in a
                 single 400k Xilinx FPGA (Spartan 3-XC3S400) with a
                 sustained throughput of 1.6 Gbps. Given a larger FPGA,
                 the design can scale linearly to support a greater
                 number of patterns, as well as higher data
                 throughput.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "firewall; network intrusion detection; string filter;
                 virus; worm",
}

@Article{Pasricha:2008:FEB,
  author =       "Sudeep Pasricha and Nikil Dutt and Mohamed
                 Ben-Romdhane",
  title =        "Fast exploration of bus-based communication
                 architectures at the {CCATB} abstraction",
  journal =      j-TECS,
  volume =       "7",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1331331.1331346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:00 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Currently, system-on-chip (SoC) designs are becoming
                 increasingly complex, with more and more components
                 being integrated into a single SoC design.
                 Communication between these components is increasingly
                 dominating critical system paths and frequently becomes
                 the source of performance bottlenecks. It, therefore,
                 becomes imperative for designers to explore the
                 communication space early in the design flow.
                 Traditionally, system designers have used Pin-Accurate
                 Bus Cycle Accurate (PA-BCA) models for early
                 communication space exploration. These models capture
                 all of the bus signals and strictly maintain cycle
                 accuracy, which is useful for reliable performance
                 exploration but results in slow simulation speeds for
                 complex, designs, even when they are modeled using
                 high-level languages. Recently, there have been several
                 efforts to use the Transaction-Level Modeling (TLM)
                 paradigm for improving simulation performance in BCA
                 models. However, these transaction-based BCA (T-BCA)
                 models capture a lot of details that can be eliminated
                 when exploring communication architectures. In this
                 paper, we extend the TLM approach and propose a new
                 transaction-based modeling abstraction level (CCATB) to
                 explore the communication design space. Our abstraction
                 level bridges the gap between the TLM and BCA levels,
                 and yields an average performance speedup of 120\% over
                 PA-BCA and 67\% over T-BCA models, on average. The
                 CCATB models are not only faster to simulate, but also
                 extremely accurate and take less time to model compared
                 to both T-BCA and PA-BCA models. We describe the
                 mechanisms that produce the speedup in CCATB models and
                 also analyze how the achieved simulation speedup scales
                 with design complexity. To demonstrate the
                 effectiveness of using CCATB for exploration, we
                 present communication space exploration case studies
                 from the broadband communication and multimedia
                 application domains.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "communication architecture; on-chip bus; performance
                 exploration; system-on-chip; transaction-level
                 modeling",
}

@Article{DiNatale:2008:BOM,
  author =       "Marco {Di Natale} and Valerio Pappalardo",
  title =        "Buffer optimization in multitask implementations of
                 {Simulink} models",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347376",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic generation of a controller implementation
                 from a synchronous reactive model is among the best
                 practices for software development in the automotive
                 and aeronautics industry, because of the possibility of
                 simulation, model checking, and error-free
                 implementation. This paper discusses an algorithm for
                 optimizing the single-processor multitask
                 implementation of Simulink models with real-time
                 execution constraints, derived from the sampling rates
                 of the functional blocks. Existing code generation
                 tools enforce the addition of extra buffering and
                 latencies whenever there is a rate transition among
                 functional blocks. This work shows how timing analysis
                 can be used to find the cases in which additional
                 buffering and latency can be avoided, improving the
                 space and time performance of the application. The
                 proposed search algorithm allows finding a solution
                 with reduced and possibly minimal use of buffering even
                 for very high values of processor utilization.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "code generation; real-time programming;
                 schedulability; software models",
}

@Article{Trajkovic:2008:ISA,
  author =       "Jelena Trajkovic and Alexander V. Veidenbaum and Arun
                 Kejariwal",
  title =        "Improving {SDRAM} access energy efficiency for
                 low-power embedded systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347377",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "DRAM (dynamic random-access memory) energy consumption
                 in low-power embedded systems can be very high,
                 exceeding that of the data cache or even that of the
                 processor. This paper presents and evaluates a scheme
                 for reducing the energy consumption of SDRAM
                 (synchronous DRAM) memory access by a combination of
                 techniques that take advantage of SDRAM energy
                 efficiencies in bank and row access. This is achieved
                 by using small, cachelike structures in the memory
                 controller to prefetch an additional cache block(s) on
                 SDRAM reads and to combine block writes to the same
                 SDRAM row. The results quantify the SDRAM energy
                 consumption of MiBench applications and demonstrate
                 significant savings in SDRAM energy consumption, 23\%,
                 on average, and reduction in the energy-delay product,
                 44\%, on average. The approach also improves
                 performance: the CPI is reduced by 26\%, on average.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded processors and low power; fetch buffer;
                 SDRAM; write-combining buffer",
}

@Article{Varma:2008:AFS,
  author =       "Ankush Varma and Eric Debes and Igor Kozintsev and
                 Paul Klein and Bruce Jacob",
  title =        "Accurate and fast system-level power modeling: an
                 {XScale}-based case study",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347378",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Accurate and fast system modeling is central to the
                 rapid design space exploration needed for
                 embedded-system design. With fast, complex SoCs playing
                 a central role in such systems, system designers have
                 come to require MIPS-range simulation speeds and
                 near-cycle accuracy. The sophisticated simulation
                 frameworks that have been developed for high-speed
                 system performance modeling do not address power
                 consumption, although it is a key design constraint. In
                 this paper, we define a simulation-based methodology
                 for extending system performance modeling frameworks to
                 also include power modeling. We demonstrate the use of
                 this methodology with a case study of a real, complex
                 embedded system, comprising the Intel XScale embedded
                 microprocessor, its WMMX SIMD co processor, L1 caches,
                 SDRAM, and the on-board address and data buses. We
                 describe detailed power models for each of these
                 components and validate them against physical
                 measurements from hardware, demonstrating that such
                 frameworks enable designers to model both power and
                 performance at high speeds without sacrificing
                 accuracy. Our results indicate that the power estimates
                 obtained are accurate within 5\% of physical
                 measurements from hardware, while simulation speeds
                 consistently exceed a million instructions per second
                 (MIPS).",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded systems; power modeling; SystemC",
}

@Article{Aamodt:2008:CTI,
  author =       "Tor M. Aamodt and Paul Chow",
  title =        "Compile-time and instruction-set methods for improving
                 floating- to fixed-point conversion accuracy",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347379",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This paper proposes and evaluates compile time and
                 instruction-set techniques for improving the accuracy
                 of signal-processing algorithms run on fixed-point
                 embedded processors. These techniques are proposed in
                 the context of a profile guided floating- to
                 fixed-point compiler-based conversion process. A novel
                 fixed-point scaling algorithm (IRP) is introduced that
                 exploits correlations between values in a program by
                 applying fixed-point scaling, retaining as much
                 precision as possible without causing overflow. This
                 approach is extended into a more aggressive scaling
                 algorithm (IRP-SA) by leveraging the modulo nature of
                 2's complement addition and subtraction to discard most
                 significant bits that may not be redundant
                 sign-extension bits. A complementary scaling technique
                 (IDS) is then proposed that enables the fixed-point
                 scaling of a variable to be parameterized, depending
                 upon the context of its definitions and uses. Finally,
                 a novel instruction-set enhancement--- fractional
                 multiplication with internal left shift (FMLS)---is
                 proposed to further leverage interoperand correlations
                 uncovered by the IRP-SA scaling algorithm. FMLS
                 preserves a different subset of the full product's bits
                 than traditional fractional fixed-point or integer
                 multiplication. On average, FMLS combined with IRP-SA
                 improves accuracy on processors with uniform bitwidth
                 register architectures by the equivalent of 0.61 bits
                 of additional precision for a set of signal-processing
                 benchmarks (up to 2 bits). Even without employing FMLS,
                 the IRP-SA scaling algorithm achieves additional
                 accuracy over two previous fixed-point scaling
                 algorithms by averages of 1.71 and 0.49 bits.
                 Furthermore, as FMLS combines multiplication with a
                 scaling shift, it reduces execution time by an average
                 of 9.8\%. An implementation of IDS, specialized to
                 single-nested loops, is found to improve accuracy of a
                 lattice filter benchmark by the equivalent of more than
                 16-bits of precision.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compilation; digital signal processing; fixed-point;
                 fractional multiplication; scaling; signal-to-noise
                 ratio",
}

@Article{Fei:2008:EAF,
  author =       "Yunsi Fei and Lin Zhong and Niraj K. Jha",
  title =        "An energy-aware framework for dynamic software
                 management in mobile computing systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347380",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy efficiency has become a very important and
                 challenging issue for resource-constrained mobile
                 computers. In this article, we propose a novel dynamic
                 software management (DSOM) framework to improve battery
                 utilization. We have designed and implemented a DSOM
                 module in user space, independent of the operating
                 system (OS), which explores quality-of-service (QoS)
                 adaptation to reduce system energy and employs a
                 priority-based preemption policy for multiple
                 applications to avoid competition for limited energy
                 resources. Software energy macromodels for mobile
                 applications are employed to predict energy demand at
                 each QoS level, so that the DSOM module is able to
                 select the best possible trade-off between energy
                 conservation and application QoS; it also honors the
                 priority desired by the user. Our experimental results
                 for some mobile applications (video player, speech
                 recognizer, voice-over-IP) show that this approach can
                 meet user-specified task-oriented goals and
                 significantly improve battery utilization.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "energy macromodel; runtime coordination; software
                 adaptation",
}

@Article{Zhong:2008:SWE,
  author =       "Xiliang Zhong and Cheng-Zhong Xu",
  title =        "System-wide energy minimization for real-time tasks:
                 {Lower} bound and approximation",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347381",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a dynamic voltage scaling (DVS) technique
                 that minimizes system-wide energy consumption for both
                 periodic and sporadic tasks. It is known that a system
                 consists of processors and a number of other
                 components. Energy-aware processors can be run in
                 different speed levels; components like memory and I/O
                 subsystems and network interface cards can be in a
                 standby state when they are active, but idle. Processor
                 energy optimization solutions are not necessarily
                 efficient from the perspective of systems. Current
                 system-wide energy optimization studies are often
                 limited to periodic tasks with heuristics in getting
                 approximated solutions. In this paper, we develop an
                 exact dynamic programming algorithm for periodic tasks
                 on processors with practical discrete speed levels. The
                 algorithm determines the lower bound of energy
                 expenditure in pseudopolynomial time. An approximation
                 algorithm is proposed to provide performance guarantee
                 with a given bound in polynomial running time. Because
                 of their time efficiency, both the optimization and
                 approximation algorithms can be adapted for online
                 scheduling of sporadic tasks with irregular task
                 releases. We prove that system-wide energy optimization
                 for sporadic tasks is NP-hard in the strong sense. We
                 develop (pseudo-) polynomial-time solutions by
                 exploiting its inherent properties.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "dynamic power management; dynamic voltage scaling;
                 power-aware scheduling; real-time systems",
}

@Article{Zhou:2008:CIA,
  author =       "Ye Zhou and Edward A. Lee",
  title =        "Causality interfaces for actor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347382",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider concurrent models of computation where
                 ``actors'' (components that are in charge of their own
                 actions) communicate by exchanging messages. The
                 interfaces of actors principally consist of ``ports,''
                 which mediate the exchange of messages. Actor-oriented
                 architectures contrast with and complement
                 object-oriented models by emphasizing the exchange of
                 data between concurrent components rather than
                 transformation of state. Examples of such models of
                 computation include the classical actor model,
                 synchronous languages, data-flow models, process
                 networks, and discrete-event models. Many experimental
                 and production languages used to design embedded
                 systems are actor oriented and based on one of these
                 models of computation. Many of these models of
                 computation benefit considerably from having access to
                 causality information about the components. This paper
                 augments the interfaces of such components to include
                 such causality information. It shows how this causality
                 information can be algebraically composed so that
                 compositions of components acquire causality interfaces
                 that are inferred from their components and the
                 interconnections. We illustrate the use of these
                 causality interfaces to statically analyze timed models
                 and synchronous language compositions for causality
                 loops and data-flow models for deadlock. We also show
                 that causality analysis for each communication cycle
                 can be performed independently and in parallel, and it
                 is only necessary to analyze one port for each cycle.
                 Finally, we give a conservative approximation technique
                 for handling dynamically changing causality
                 properties.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "actors; behavioral types; causality; data flow;
                 deadlock; discrete-event models; interfaces;
                 synchronous languages; timed systems",
}

@Article{Shin:2008:CRT,
  author =       "Insik Shin and Insup Lee",
  title =        "Compositional real-time scheduling framework with
                 periodic model",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347383",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is desirable to develop large complex systems using
                 components based on systematic abstraction and
                 composition. Our goal is to develop a compositional
                 real-time scheduling framework to support abstraction
                 and composition techniques for real-time aspects of
                 components. In this paper, we present a formal
                 description of compositional real-time scheduling
                 problems, which are the component abstraction and
                 composition problems. We identify issues that need be
                 addressed by solutions and provide our framework for
                 the solutions, which is based on the periodic
                 interface. Specifically, we introduce the periodic
                 resource model to characterize resource allocations
                 provided to a single component. We present exact
                 schedulability conditions for the standard Liu and
                 Layland periodic task model and the proposed periodic
                 resource model under EDF and RM scheduling, and we show
                 that the component abstraction and composition problems
                 can be addressed with periodic interfaces through the
                 exact schedulability conditions. We also provide the
                 utilization bounds of a periodic task set over the
                 periodic resource model and the abstraction bounds of
                 periodic interfaces for a periodic task set under EDF
                 and RM scheduling. We finally present the analytical
                 bounds of overheads that our solution incurs in terms
                 of resource utilization increase and evaluate the
                 overheads through simulations.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "abstract; component; composition; hierarchical;
                 interface; real-time; scheduling",
}

@Article{Voyiatzis:2008:SFS,
  author =       "Artemios G. Voyiatzis and Dimitrios N. Serpanos",
  title =        "The security of the {Fiat--Shamir} scheme in the
                 presence of transient hardware faults",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347384",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Implementation cryptanalysis has emerged as a
                 realistic threat for cryptographic systems. It consists
                 of two classes of attacks: fault-injection and
                 side-channel attacks. In this work, we examine the
                 resistance of the Fiat--Shamir scheme to
                 fault-injection attacks, since Fiat--Shamir is a
                 popular scheme for ``light'' consumer devices, such as
                 smartcards, in a wide range of consumer services. We
                 prove that an existing attack, known as the Bellcore
                 attack, is incomplete. We propose an extension to the
                 protocol that proactively secures Fiat--Shamir systems
                 from the Bellcore attack and we prove its strength.
                 Finally, we introduce a new attack model, which, under
                 stronger assumptions, can derive the secret keys from
                 both the original Fiat--Shamir scheme as well as its
                 proposed extension. Our approach demonstrates that
                 countermeasures for implementation cryptanalysis must
                 be carefully designed and that deployed systems must
                 include appropriate protection mechanisms for all known
                 attacks and be flexible enough to incorporate
                 countermeasures for new ones.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Bellcore attack; cryptography; Fiat--Shamir
                 identification scheme; side-channel attacks;
                 smartcards",
}

@Article{Gurun:2008:NGP,
  author =       "Selim Gurun and Chandra Krintz and Rich Wolski",
  title =        "{NWSLite}: a general-purpose, nonparametric
                 prediction utility for embedded systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347385",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Time series-based prediction methods have a wide range
                 of uses in embedded systems. Many OS algorithms and
                 applications require accurate prediction of demand and
                 supply of resources. However, configuring prediction
                 algorithms is not easy, since the dynamics of the
                 underlying data requires continuous observation of the
                 prediction error and dynamic adaptation of the
                 parameters to achieve high accuracy. Current prediction
                 methods are either too costly to implement on
                 resource-constrained devices or their parameterization
                 is static, making them inappropriate and inaccurate for
                 a wide range of datasets. This paper presents NWSLite,
                 a prediction utility that addresses these shortcomings
                 on resource-restricted platforms.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "computation offloading; CPU availability estimation;
                 embedded systems; network performance estimation;
                 prediction algorithms",
}

@Article{Yan:2008:DOD,
  author =       "Ting Yan and Yu Gu and Tian He and John A.
                 Stankovic",
  title =        "Design and optimization of distributed sensing
                 coverage in wireless sensor networks",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347386",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "For many sensor network applications, such as military
                 surveillance, it is necessary to provide full sensing
                 coverage to a security-sensitive area while, at the
                 same time, minimizing energy consumption and extending
                 system lifetime by leveraging the redundant deployment
                 of sensor nodes. In this paper, we propose a
                 surveillance service for sensor networks based on a
                 distributed energy-efficient sensing coverage protocol.
                 In the protocol, each node is able to dynamically
                 decide a schedule for itself to guarantee a certain
                 degree-of-coverage (DOC) with average energy
                 consumption inversely proportional to the node density.
                 Several optimizations and extensions are proposed to
                 enhance the basic design with a better load-balance
                 feature and a longer network lifetime. We consider and
                 address the impact of the target size and the
                 unbalanced initial energy capacity of individual nodes
                 to the network lifetime. Several practical issues such
                 as the localization error, irregular sensing range, and
                 unreliable communication links are addressed as well.
                 Simulation shows that our protocol extends system
                 lift-time significantly with low energy consumption. It
                 outperforms other state-of-the-art schemes by as much
                 as 50\% reduction in energy consumption and as much as
                 130\% increase in the half-life of the network.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "energy conservation; sensing coverage; sensor
                 networks",
}

@Article{Ozer:2008:SBE,
  author =       "Emre {\"O}zer and Andy P. Nisbet and David Gregg",
  title =        "A stochastic bitwidth estimation technique for compact
                 and low-power custom processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347387",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "There is an increasing trend toward compiling from C
                 to custom hardware for designing embedded systems in
                 which the area and power consumption of
                 application-specific functional units, registers, and
                 memory blocks are heavily dependent on the bit-widths
                 of integer operands used in computations. The actual
                 bit-width required to store the values assigned to an
                 integer variable during the execution of a program will
                 not, in general, match the built-in C data types. Thus,
                 precious area is wasted if the built-in data type sizes
                 are used to declare the size of integer operands. In
                 this paper, we introduce stochastic bit-width
                 estimation that follows a simulation-based
                 probabilistic approach to estimate the bit-widths of
                 integer variables using extreme value theory. The
                 estimation technique is also empirically compared to
                 two compile-time integer bit-width analysis techniques.
                 Our experimental results show that the stochastic
                 bit-width estimation technique dramatically reduces
                 integer bit-widths and, therefore, enables more compact
                 and power-efficient custom hardware designs than the
                 compile-time integer bit-width analysis techniques. Up
                 to 37\% reduction in custom hardware area and 30\%
                 reduction in logic power consumption using stochastic
                 bit-width estimation can be attained over ten integer
                 applications implemented on an FPGA chip.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "bit-width analysis; custom hardware; extreme value
                 theory; FPGA; statistical estimation",
}

@Article{Kumar:2008:CCP,
  author =       "Rajeev Kumar and Dipankar Das",
  title =        "Code compression for performance enhancement of
                 variable-length embedded processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347388",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most of the work done in the field of code compression
                 pertains to processors with fixed-length instruction
                 encoding. The design of a code-compression scheme for
                 variable-length instruction encodings poses newer
                 design challenges. In this work, we first investigate
                 the scope for code compression on variable-length
                 instruction-set processors whose encodings are already
                 optimized to a certain extent with respect to their
                 usage. For such ISAs instruction boundaries are not
                 known prior to decoding. Another challenging task of
                 designing a code-compression scheme for such ISAs is
                 designing the decompression hardware, which must
                 decompress code postcache so that we gain in
                 performance. We present two dictionary-based code
                 compression schemes. The first algorithm uses a
                 bit-vector; the second one uses reserved instructions
                 to identify code words. We design additional logic for
                 each of the schemes to decompress the code on-the-fly.
                 We test the two algorithms with a variable-length RISC
                 processor. We provide a detailed experimental analysis
                 of the empirical results obtained by extensive
                 simulation-based design space exploration for this
                 system. The optimized decompressor can now execute
                 compressed program faster than the native program. The
                 experiments demonstrate reduction in code size (up to
                 30\%), speed-up (up to 15\%), and bus-switching
                 activity (up to 20\%). We also implement one
                 decompressor in a hardware description language and
                 synthesize it to illustrate the small overheads
                 associated with the proposed approach.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "bus switching; code compression; code decompression;
                 embedded systems; instruction memory; RISC processor;
                 variable-length ISAs",
}

@Article{Wilhelm:2008:WCE,
  author =       "Reinhard Wilhelm and Jakob Engblom and Andreas
                 Ermedahl and Niklas Holsti and Stephan Thesing and
                 David Whalley and Guillem Bernat and Christian
                 Ferdinand and Reinhold Heckmann and Tulika Mitra and
                 Frank Mueller and Isabelle Puaut and Peter Puschner and
                 Jan Staschulat and Per Stenstr{\"o}m",
  title =        "The worst-case execution-time problem---overview of
                 methods and survey of tools",
  journal =      j-TECS,
  volume =       "7",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1347375.1347389",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 12 15:22:21 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The determination of upper bounds on execution times,
                 commonly called worst-case execution times (WCETs), is
                 a necessary step in the development and validation
                 process for hard real-time systems. This problem is
                 hard if the underlying processor architecture has
                 components, such as caches, pipelines, branch
                 prediction, and other speculative components. This
                 article describes different approaches to this problem
                 and surveys several commercially available tools 1 and
                 research prototypes.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "hard real time; worst-case execution times",
}

@Article{Hessell:2008:EES,
  author =       "Fabiano Hessell and Kenneth Kent and Dionisios
                 Pnevmatikatos",
  title =        "Editorial: {Embedded} systems --- new challenges and
                 future directions",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376805",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Park:2008:RFF,
  author =       "Chanik Park and Wonmoon Cheon and Jeonguk Kang and
                 Kangho Roh and Wonhee Cho and Jin-Soo Kim",
  title =        "A reconfigurable {FTL} (flash translation layer)
                 architecture for {NAND} flash-based applications",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376806",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a novel FTL (flash translation layer)
                 architecture is proposed for NAND flash-based
                 applications such as MP3 players, DSCs (digital still
                 cameras) and SSDs (solid-state drives). Although the
                 basic function of an FTL is to translate a logical
                 sector address to a physical sector address in flash
                 memory, efficient algorithms of an FTL have a
                 significant impact on performance as well as the
                 lifetime. After the dominant parameters that affect the
                 performance and endurance are categorized, the design
                 space of the FTL architecture is explored based on a
                 diverse workload analysis. With the proposed FTL
                 architectural framework, it is possible to decide which
                 configuration of FTL mapping parameters yields the best
                 performance, depending on the differing characteristics
                 of various NAND flash-based applications.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Flash memory; FTL; performance analysis;
                 reconfigurable architecture",
}

@Article{Popovici:2008:PBS,
  author =       "Katalin Popovici and Xavier Guerin and Frederic
                 Rousseau and Pier Stanislao Paolucci and Ahmed Amine
                 Jerraya",
  title =        "Platform-based software design flow for heterogeneous
                 {MPSoC}",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376807",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Current multimedia applications demand complex
                 heterogeneous multiprocessor architectures with
                 specific communication infrastructure in order to
                 achieve the required performances. Programming these
                 architectures usually results in writing separate
                 low-level code for the different processors (DSP,
                 microcontroller), implying late global validation of
                 the overall application with the hardware platform. We
                 propose a platform-based software design flow able to
                 efficiently use the resources of the architecture and
                 allowing easy experimentation of several mappings of
                 the application onto the platform resources. We use a
                 high-level environment to capture both application and
                 architecture initial representations. An executable
                 software stack is generated automatically for each
                 processor from the initial model. The software
                 generation and validation is performed gradually
                 corresponding to different software abstraction levels.
                 Specific software development platforms (abstract
                 models of the architecture) are generated and used to
                 allow debugging of the different software components
                 with explicit hardware-software interaction. We applied
                 this approach on a multimedia platform, involving a
                 high performance DSP and a RISC processor, to explore
                 communication architecture and generate an efficient
                 executable code for a multimedia application. Based on
                 automatic tools, the proposed flow increases
                 productivity and preserves design quality.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "multimedia; Multiprocessor system-on chip; programming
                 environment; Simulink; software design; SystemC;
                 transaction level modeling",
}

@Article{Chattopadhyay:2008:PPA,
  author =       "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
                 Rakosi and K. Karuri and D. Kammler and R. Leupers and
                 G. Ascheid and H. Meyr",
  title =        "Prefabrication and postfabrication architecture
                 exploration for partially reconfigurable {VLIW}
                 processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376808",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern application-specific instruction-set processors
                 (ASIPs) face the daunting task of delivering high
                 performance for a wide range of applications. For
                 enhancing the performance, architectural features, for
                 example, pipelining, VLIW, are often employed in ASIPs,
                 leading to high design complexity. Integrated ASIP
                 design environments, like template-based approaches and
                 language-driven approaches, provide an answer to this
                 growing design complexity. At the same time, increasing
                 hardware design costs have motivated the processor
                 designers to introduce high flexibility in the
                 processor. Flexibility, in its most effective form, can
                 be introduced to the ASIP by coupling a reconfigurable
                 unit to the base processor. Because of its obvious
                 benefits, several reconfigurable ASIPs (rASIPs) have
                 been designed for years. This design paradigm gained
                 momentum with the advent of coarse-grained FPGAs, where
                 the lack of domain-specific performance common in
                 general-purpose FPGAs are largely overcome by choosing
                 application-dependent basic functional units. These
                 rASIP designs lack a generic flow from high-level
                 specification, resulting in intuitive design decisions
                 and hard-to-retarget processor design tools. Although
                 partial, template-based approaches for rASIP design is
                 existent, a clear design methodology especially for the
                 prefabrication architecture exploration is not present.
                 In order to address this issue, a high-level
                 specification and design methodology for partially
                 reconfigurable VLIW processors is proposed in this
                 article. To show the benefit of this approach, a
                 commercial VLIW processor is used as the base
                 architecture and two domains of applications are
                 studied for potential performance gain.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ASIP; coarse-grained FPGA; VLIW",
}

@Article{Lin:2008:MAC,
  author =       "Yi-Neng Lin and Ying-Dar Lin and Yuan-Cheng Lai and
                 Kuo-Kun Tseng",
  title =        "Modeling and analysis of core-centric network
                 processors",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376809",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors can be categorized into two types,
                 the coprocessors-centric model in which data-plane is
                 handled by coprocessors, and the core-centric model in
                 which the core processes most of the data-plane packets
                 yet offloading some tasks to coprocessors. While the
                 former has been properly explored over various
                 applications, research regarding the latter remain
                 limited. Based on the previous experience of
                 prototyping the virtual private network (VPN) over the
                 IXP425 network processor, this work aims to derive
                 design implications for the core-centric model
                 performing computational intensive applications. From
                 system and IC vendors' perspectives, the
                 continuous-time Markov chain and Petri net simulations
                 are adopted to explore this architecture. Analytical
                 results prove to be quite inline with those of the
                 simulation and implementation. With subsequent
                 investigation, we find that appropriate process run
                 lengths can improve the effective core utilization by
                 2.26 times, and by offloading the throughput boosts 7.5
                 times. The results also suggest single-process
                 programming, since context-switch overhead impacts
                 considerably on the performance.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "core-centric; embedded system; modeling; Network
                 processor; simulation",
}

@Article{Get:2008:PFE,
  author =       "Jerome Hugues Get and Bechir Zalila Get and Laurent
                 Pautet Get and Fabrice Kordon",
  title =        "From the prototype to the final embedded system using
                 the {Ocarina AADL} tool suite",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376810",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Building distributed deal-time embedded systems
                 requires a stringent methodology, from early
                 requirement capture to full implementation. However,
                 there is a strong link between the requirements and the
                 final implementation (e.g., scheduling and resource
                 dimensioning). Therefore, a rapid prototyping process
                 based on automation of tedious and error-prone tasks
                 (analysis and code generation) is required to speed up
                 the development cycle. In this article, we show how the
                 AADL ({\em Architecture Analysis and Design
                 Language\/}), which appeared in late 2004, helps solve
                 these issues thanks to a dedicated tool suite. We then
                 detail the prototyping process and its current
                 implementation: Ocarina.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "AADL; distributed; DRE; embedded; Ocarina; PolyORB-HI;
                 real-time",
}

@Article{Benveniste:2008:CHR,
  author =       "Albert Benveniste and Beno{\^\i}t Caillaud and Luca P.
                 Carloni and Paul Caspi and Alberto
                 L. Sangiovanni-Vincentelli",
  title =        "Composing heterogeneous reactive systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376811",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a compositional theory of heterogeneous
                 reactive systems. The approach is based on the concept
                 of tags marking the events of the signals of a system.
                 Tags can be used for multiple purposes from indexing
                 evolution in time (time stamping) to expressing
                 relations among signals, like coordination (e.g.,
                 synchrony and asynchrony) and causal dependencies. The
                 theory provides flexibility in system modeling because
                 it can be used both as a unifying mathematical
                 framework to relate heterogeneous models of
                 computations and as a formal vehicle to implement
                 complex systems by combining heterogeneous components.
                 In particular, we introduce an algebra of tag
                 structures to define heterogeneous parallel composition
                 formally. Morphisms between tag structures are used to
                 define relationships between heterogeneous models at
                 different levels of abstraction. In particular, they
                 can be used to represent design transformations from
                 tightly synchronized specifications to
                 loosely-synchronized implementations. The theory has an
                 important application in the correct-by-construction
                 deployment of synchronous design on distributed
                 architectures.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Compositionality; correct-by-construction design;
                 GALS; models of computation; reactive systems",
}

@Article{Gebotys:2008:EAW,
  author =       "Catherine H. Gebotys and Brian A. White",
  title =        "{EM} analysis of a wireless {Java}-based {PDA}",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376812",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The susceptibility of wireless portable devices to
                 electromagnetic (EM) attacks is largely unknown. If
                 analysis of electromagnetic (EM) waves emanating from
                 the wireless device during a cryptographic computation
                 do leak sufficient information, it may be possible for
                 an attacker to reconstruct the secret key. Possession
                 of the secret cryptographic key would render all future
                 wireless communications insecure and cause further
                 potential problems, such as identity theft. Despite the
                 complexities of a PDA wireless device, such as
                 operating system events, interrupts, cache misses, and
                 other interfering events, this article demonstrates
                 that, for the first time, repeatable EM differential
                 attacks are possible. The proposed differential
                 analysis methodology involves precharacterization of
                 the PDA device (thresholding and pattern recognition),
                 and a new frequency-based differential analysis. Unlike
                 previous research, the new methodology does not require
                 perfect alignment of EM frames and is repeatable in the
                 presence of a complex embedded system (including cache
                 misses, operating system events, etc), thus supporting
                 attacks on real embedded systems. This research is
                 important for future wireless embedded systems, which
                 will increasingly demand higher levels of security.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "countermeasures; EM analysis; power attacks;
                 Side-channel analysis",
}

@Article{Ayav:2008:IFT,
  author =       "Tolga Ayav and Pascal Fradet and Alain Girault",
  title =        "Implementing fault-tolerance in real-time programs by
                 automatic program transformations",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376813",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a formal approach to implement
                 fault-tolerance in real-time embedded systems. The
                 initial fault-intolerant system consists of a set of
                 independent periodic tasks scheduled onto a set of
                 fail-silent processors connected by a reliable
                 communication network. We transform the tasks such
                 that, assuming the availability of an additional spare
                 processor, the system tolerates one failure at a time
                 (transient or permanent). Failure detection is
                 implemented using heartbeating, and failure masking
                 using checkpointing and rollback. These techniques are
                 described and implemented by automatic program
                 transformations on the tasks' programs. The proposed
                 formal approach to fault-tolerance by program
                 transformations highlights the benefits of separation
                 of concerns. It allows us to establish correctness
                 properties and to compute optimal values of parameters
                 to minimize fault-tolerance overhead. We also present
                 an implementation of our method, to demonstrate its
                 feasibility and its efficiency.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "checkpointing; correctness proofs; Fault-tolerance;
                 heartbeating; program transformations",
}

@Article{Middha:2008:MMS,
  author =       "Bhuvan Middha and Matthew Simpson and Rajeev Barua",
  title =        "{MTSS}: {Multitask} stack sharing for embedded
                 systems",
  journal =      j-TECS,
  volume =       "7",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1376804.1376814",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Aug 5 19:32:59 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Out-of-memory errors are a serious source of
                 unreliability in most embedded systems. Applications
                 run out of main memory because of the frequent
                 difficulty of estimating the memory requirement before
                 deployment, either because it depends on input data, or
                 because certain language features prevent estimation.
                 The typical lack of disks and virtual memory in
                 embedded systems has a serious consequence when an
                 out-of-memory error occurs. Without swap space, the
                 system crashes if its memory footprint exceeds the
                 available memory by even 1 byte. This work improves
                 reliability for multitasking embedded systems by
                 proposing MTSS, a multitask stack sharing technique. If
                 a task attempts to overflow the bounds of its allocated
                 stack space, MTSS grows its stack into the stack memory
                 space allocated for other tasks. This technique can
                 avoid the out-of-memory error if the extra space
                 recovered is sufficient to complete execution.
                 Experiments show that MTSS is able to recover an
                 average of 54\% of the stack space allocated to the
                 overflowing task in the free space of other tasks. In
                 addition, unlike conventional systems, MTSS detects
                 memory overflows, allowing the possibility of remedial
                 action or a graceful exit if the recovered space is not
                 enough. Alternatively, MTSS can be used for decreasing
                 the required physical memory of an embedded system by
                 reducing the initial memory allocated to each of the
                 tasks and recovering the deficit by sharing stack with
                 other tasks. The overheads of MTSS are low: the runtime
                 and energy overheads are 3.1\% and 3.2\%, on average.
                 These are tolerable given that reliability is the most
                 important concern in virtually all systems, ahead of
                 other concerns, such as runtime and energy.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "cactus stack; data compression; heap overflow; meshed
                 stack; Out-of-memory errors; reliability; reuse;
                 runtime checks; stack overflow; virtual memory",
}

@Article{Inoue:2008:FAC,
  author =       "Hiroaki Inoue and Junji Sakai and Sunao Torii and
                 Masato Edahiro",
  title =        "{FIDES}: an advanced chip multiprocessor platform
                 for secure next generation mobile terminals",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457247",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a secure platform on a chip multiprocessor,
                 FIDES, in order to enable next generation mobile
                 terminals to execute downloaded native applications for
                 Linux. Its most important feature is the higher
                 security based on multigrained separation mechanisms.
                 Four new technologies support the FIDES platform: bus
                 filter logic, XIP kernels, policy separation, and
                 dynamic access control. With these technologies, the
                 FIDES platform can tolerate both application-level and
                 kernel-level bugs on an actual download subsystem.
                 Thus, the best-suited platform to secure next
                 generation mobile terminals is FIDES.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "chip multiprocessor; Secure mobile terminal; SELinux",
}

@Article{Park:2008:ATL,
  author =       "Taejoon Park and Kang G. Shin",
  title =        "Attack-tolerant localization via iterative
                 verification of locations in sensor networks",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457248",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In sensor networks, secure localization ---
                 determining sensors' locations in a hostile, untrusted
                 environment --- is a challenging, but very important,
                 problem that has not yet been addressed effectively.
                 This paper presents an attack-tolerant localization
                 protocol, called {\em Verification for Iterative
                 Localization\/} (VeIL), under which sensors
                 cooperatively safeguard the localization service. By
                 exploiting the high spatiotemporal correlation existing
                 between adjacent nodes, VeIL realizes (a) adaptive
                 management of a profile for normal localization
                 behavior, and (b) distributed detection of false
                 locations advertised by attackers by comparing them
                 against the profile of normal behavior. Our analysis
                 and simulation results show that VeIL achieves
                 high-level tolerance to many critical attacks, and is
                 computationally feasible on resource-limited sensors.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Anomaly detection; attack-tolerance; localization;
                 recursive least squares; sensor networks",
}

@Article{Mitra:2008:VAD,
  author =       "Sayan Mitra and Daniel Liberzon and Nancy Lynch",
  title =        "Verifying average dwell time of hybrid systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457249",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Average dwell time (ADT) properties characterize the
                 rate at which a hybrid system performs mode switches.
                 In this article, we present a set of techniques for
                 verifying ADT properties. The stability of a hybrid
                 system A can be verified by combining these techniques
                 with standard methods for checking stability of the
                 individual modes of A.\par

                 We introduce a new type of simulation relation for
                 hybrid automata --- {\em switching simulation\/} ---
                 for establishing that a given automaton A switches more
                 rapidly than another automaton B. We show that the
                 question of whether a given hybrid automaton has ADT
                 {\tau}$_{{\em a\/}}$ can be answered either by checking
                 an invariant or by solving an optimization problem. For
                 classes of hybrid automata for which invariants can be
                 checked automatically, the invariant-based method
                 yields an automatic method for verifying ADT; for
                 automata that are outside this class, the invariant has
                 to be checked using inductive techniques. The
                 optimization-based method is automatic and is
                 applicable to a restricted class of initialized hybrid
                 automata. A solution of the optimization problem either
                 gives a counterexample execution that violates the ADT
                 property, or it confirms that the automaton indeed
                 satisfies the property. The optimization and the
                 invariant-based methods can be used in combination to
                 find the unknown ADT of a given hybrid automaton.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Hybrid systems; optimization-based verification;
                 simulation relation",
}

@Article{Schirner:2008:QAS,
  author =       "Gunar Schirner and Rainer D{\"o}mer",
  title =        "Quantitative analysis of the speed\slash accuracy
                 trade-off in transaction level modeling",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457250",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing complexity of embedded systems requires
                 modeling at higher levels of abstraction. Transaction
                 level modeling (TLM) has been proposed to abstract
                 communication for high-speed system simulation and
                 rapid design space exploration. Although being widely
                 accepted for its high performance and efficiency, TLM
                 often exhibits a significant loss in model
                 accuracy.\par

                 In this article, we systematically analyze and quantify
                 the speed/accuracy trade-off in TLM. To this end, we
                 provide a classification of TLM abstraction levels
                 based on model granularity and define appropriate
                 metrics and test setups to quantitatively measure and
                 compare the performance and accuracy of such
                 models.\par

                 Addressing several classes of embedded communication
                 protocols, we apply our analysis to three common bus
                 architectures, the industry-standard AMBA advanced
                 high-performance bus (AHB) as an on-chip parallel bus,
                 the controller area network (CAN) as an off-chip serial
                 bus, and the Motorola ColdFire Master Bus as an example
                 for a custom embedded processor bus.\par

                 Based on the analysis of these individual busses, we
                 then generalize our results for a broader conclusion.
                 The general TLM trade-off offers gains of up to four
                 orders of magnitude in simulation speed, generally
                 however, at the price of low accuracy. We conclude
                 further that model granularity is the key to efficient
                 TLM abstraction, and we identify conditions for
                 accuracy of abstract models. As a result, this article
                 provides general guidelines that allow the system
                 designer to navigate the TLM trade-off effectively and
                 choose the most suitable model for the given
                 application with fast and accurate results.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "System level design; system-on-chip; transaction level
                 modeling",
}

@Article{Zhou:2008:DAT,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Direct address translation for virtual memory in
                 energy-efficient embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457251",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a methodology for virtual memory
                 support in energy-efficient embedded systems. A
                 holistic approach is proposed, where the combined
                 efforts of compiler, operating system, and hardware
                 architecture achieve a significant system power
                 reductions. The application information extracted and
                 analyzed by the compiler is utilized dynamically by the
                 microarchitecture and the operating system to perform
                 energy-efficient and, for many memory references,
                 time-deterministic address translations. We demonstrate
                 that by using application information regarding virtual
                 memory layout, an efficient and conflict-free
                 translation process can be implemented through the
                 utilization of a small hardware direct translation
                 table (DTT) accessed in an application-specific manner.
                 The set of virtual pages is partitioned into groups,
                 such that for each group only a few of the least
                 significant bits are used as an index to obtain the
                 physical page number. We outline an efficient
                 compile-time algorithm for identifying these groups and
                 allocate their translation entries optimally into the
                 DTT. The introduced hardware is minimal in terms of
                 area, performance, and power overhead, while offering
                 the flexibility of software programmability. This is
                 achieved through a small set of registers and tables,
                 which are made software accessible. We have
                 quantitatively evaluated the proposed methodology on a
                 number of embedded applications, including voice,
                 image, and video processing.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "address translation; Low-power embedded systems;
                 virtual memory",
}

@Article{Park:2008:QSL,
  author =       "Jiyong Park and Jaesoo Lee and Saehwa Kim and Seongsoo
                 Hong",
  title =        "Quasistatic shared libraries and {XIP} for memory
                 footprint reduction in {MMU}-less embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457252",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite a rapid decrease in the price of solid state
                 memory devices, system memory is still a very precious
                 resource in embedded systems. The use of shared
                 libraries and execution-in-place (XIP) is known to be
                 effective in significantly reducing memory usage.
                 Unfortunately, many resource-constrained embedded
                 systems lack an MMU, making it extremely difficult to
                 support these techniques. To address this problem, we
                 propose a novel shared library technique called a
                 quasi-static shared library and an XIP, both based on
                 our enhanced position independent code technique. In
                 our quasistatic shared libraries, global symbols are
                 bound to pseudoaddresses at linking time and actual
                 physical addresses are bound at loading time. Unlike
                 conventional shared libraries, they do not require
                 symbol tables that take up valuable memory space and,
                 therefore, allow for expedited address translation at
                 runtime. Our XIP technique is facilitated by our
                 enhanced position independent code where a data section
                 can be arbitrarily located. Both the shared library and
                 XIP techniques are made possible by emulating an MMU's
                 memory mapping feature with a data section base
                 register (DSBR) and a data section base table
                 (DSBT).\par

                 We have implemented these proposed techniques in a
                 commercial ADSL (Asymmetric Digital Subscriber Line)
                 home network gateway equipped with an MMU-less ARM7TDMI
                 processor core, 2MB flash memory, and 16MB RAM. We
                 measured its memory usage and evaluated its performance
                 overhead by conducting a series of experiments. These
                 experiments clearly demonstrate the effectiveness of
                 our techniques in reducing memory usage. The results
                 are impressive: 35\% reduction in flash memory usage
                 when using only the shared library and 30\% reduction
                 in RAM usage when using the shared library and XIP
                 together. These results were achieved with only a
                 negligible performance penalty of less than 4\%. Even
                 though these techniques were applied to uClinux-based
                 embedded systems, they can be used for any MMU-less
                 real-time operating system.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Embedded systems; memory footprint reduction;
                 MMU-less; quasi-static linking; shared library",
}

@Article{Yan:2008:AWC,
  author =       "Jun Yan and Wei Zhang",
  title =        "Analyzing the worst-case execution time for
                 instruction caches with prefetching",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "7:1--7:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457253",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Time predictability is one of the most important
                 design considerations for real-time systems. In this
                 article, we study the impact of instruction prefetching
                 on the worst-case performance of instruction caches. We
                 extend the static cache simulation technique to model
                 and compute the worst-case instruction cache
                 performance with prefetching. The evaluation results
                 show that instruction prefetching can benefit both the
                 average-case and worst-case performance; however, the
                 degree of the worst-case performance improvement due to
                 instruction prefetching is less than that of the
                 average-case performance. As a result, the time
                 variation of computing is increased by instruction
                 prefetching. Also, our experimental results indicate
                 that the prefetching distance can significantly impact
                 the worst-case performance of instruction caches with
                 instruction prefetching. Specifically, when the
                 prefetching distance is equal to the L1 miss penalty,
                 the worst-case execution time with instruction
                 prefetching is minimized.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "hard real-time; instruction caches; instruction
                 prefetching; Worst-case execution time analysis",
}

@Article{Aaraj:2008:ADH,
  author =       "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
  title =        "Analysis and design of a hardware\slash software
                 trusted platform module for embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "1",
  pages =        "8:1--8:??",
  month =        dec,
  year =         "2008",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457246.1457254",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Jan 6 14:36:01 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Trusted platforms have been proposed as a promising
                 approach to enhance the security of general-purpose
                 computing systems. However, for many
                 resource-constrained embedded systems, the size and
                 cost overheads of a separate Trusted Platform Module
                 (TPM) chip are not acceptable. One alternative is to
                 use a software-based TPM, which implements TPM
                 functions using software that executes in a protected
                 execution domain on the embedded processor itself.
                 However, since many embedded systems have limited
                 processing capabilities and are battery-powered, it is
                 also important to ensure that the computational and
                 energy requirements for SW-TPMs are acceptable.\par

                 In this article, we perform an evaluation of the energy
                 and execution time overheads for a SW-TPM
                 implementation on a handheld appliance (Sharp Zaurus
                 PDA). We characterize the execution time and energy
                 required by each TPM command through actual
                 measurements on the target platform. We observe that
                 for most commands, overheads are primarily due to the
                 use of 2,048-bit RSA operations that are performed
                 within the SW-TPM. In order to alleviate SW-TPM
                 overheads, we evaluate the use of Elliptic Curve
                 Cryptography (ECC) as a replacement for the RSA
                 algorithm specified in the Trusted Computing Group
                 (TCG) standards. In addition, we also evaluate the
                 overheads of using the SW-TPM in the context of various
                 end applications, including trusted boot of the Linux
                 operating system (OS), a secure VoIP client, and a
                 secure Web browser. Furthermore, we analyze the
                 computational workload involved in running SW-TPM
                 commands using ECC. We then present a suite of hardware
                 and software enhancements to accelerate these commands
                 --- generic custom instructions and exploitation of
                 parallel processing capabilities in multiprocessor
                 systems-on-chip (SoCs). We report results of evaluating
                 the proposed architectures on a commercial embedded
                 processor (Xtensa from Tensilica). Through uniprocessor
                 and multiprocessor optimizations, we could achieve
                 speed-ups of up to 5.71X for individual TPM commands.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Custom instructions; embedded systems; multiprocessor
                 systems",
}

@Article{Suresh:2009:EEE,
  author =       "Dinesh C. Suresh and Banit Agrawal and Jun Yang and
                 Walid Najjar",
  title =        "Energy-efficient encoding techniques for off-chip data
                 buses",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457256",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reducing the power consumption of computing devices
                 has gained a lot of attention recently. Many research
                 works have focused on reducing power consumption in the
                 off-chip buses as they consume a significant amount of
                 total power. Since the bus power consumption is
                 proportional to the switching activity, reducing the
                 bus switching is an effective way to reduce bus power.
                 While numerous techniques exist for reducing bus power
                 in address buses, only a handful of techniques have
                 been proposed for data-bus power reduction, where
                 frequent value encoding (FVE) is the best existing
                 scheme to reduce the transition activity on the data
                 buses.\par

                 In this article, we propose improved frequent value
                 data bus-encoding techniques aimed at reducing more
                 switching activity and, hence, power consumption. We
                 propose three new schemes and five new variations to
                 exploit bit-wise temporal and spatial locality in the
                 data-bus values. Our techniques just use one external
                 control signal and capture bit-wise locality to
                 efficiently encode data values. For all the embedded
                 and SPEC applications we tested, the overall average
                 switching reduction is 53\% over unencoded data and
                 10\% more than the conventional FVE scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "bus switching; encoding; internal capacitances;
                 Low-power data buses",
}

@Article{Kejariwal:2009:ELL,
  author =       "Arun Kejariwal and Alexander V. Veidenbaum and
                 Alexandru Nicolau and Milind Girkar and Xinmin Tian and
                 Hideki Saito",
  title =        "On the exploitation of loop-level parallelism in
                 embedded applications",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457257",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advances in the silicon technology have enabled
                 increasing support for hardware parallelism in embedded
                 processors. Vector units, multiple processors/cores,
                 multithreading, special-purpose accelerators such as
                 DSPs or cryptographic engines, or a combination of the
                 above have appeared in a number of processors. They
                 serve to address the increasing performance
                 requirements of modern embedded applications. To what
                 extent the available hardware parallelism can be
                 exploited is directly dependent on the amount of
                 parallelism inherent in the given application and the
                 congruence between the granularity of hardware and
                 application parallelism. This paper discusses how
                 loop-level parallelism in embedded applications can be
                 exploited in hardware and software. Specifically, it
                 evaluates the efficacy of automatic loop
                 parallelization and the performance potential of
                 different types of parallelism, viz., true thread-level
                 parallelism (TLP), speculative thread-level parallelism
                 and vector parallelism, when executing loops.
                 Additionally, it discusses the interaction between
                 parallelization and vectorization. Applications from
                 both the industry-standard EEMBC{\reg},$^1$ 1.1, EEMBC
                 2.0 and the academic MiBench embedded benchmark suites
                 are analyzed using the Intel{\reg}$^2$ C compiler. The
                 results show the performance that can be achieved today
                 on real hardware and using a production compiler,
                 provide upper bounds on the performance potential of
                 the different types of thread-level parallelism, and
                 point out a number of issues that need to be addressed
                 to improve performance. The latter include
                 parallelization of libraries such as libc and design of
                 parallel algorithms to allow maximal exploitation of
                 parallelism. The results also point to the need for
                 developing new benchmark suites more suitable to
                 parallel compilation and execution.\par

                 $^1$ Other names and brands may be claimed as the
                 property of others.\par

                 $^2$ Intel is a trademark of Intel Corporation or its
                 subsidiaries in the United States and other
                 countries.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "libraries; Multi-cores; multithreading; parallel
                 loops; programming models; system-on-chip (Soc);
                 thread-level speculation; vectorization",
}

@Article{Hashemi:2009:TDS,
  author =       "Matin Hashemi and Soheil Ghiasi",
  title =        "Throughput-driven synthesis of embedded software for
                 pipelined execution on multicore architectures",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457258",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present a methodology for pipelined software
                 synthesis of streaming applications. First, we develop
                 a versatile task assignment algorithm capable of
                 optimizing realistically-arbitrary cost functions for
                 two cores. The algorithm is exact (i.e., theoretically
                 optimal) contrary to existing heuristics. Second, our
                 approximation technique provides an adjustable knob to
                 trade solution quality with algorithm runtime and
                 memory. Third, we develop a recursive heuristic for
                 more cores. FPGA-based emulated experiments validate
                 our theoretical results. The exact algorithm yields 1.7
                 \times throughput improvement. The approximation method
                 offers a range of tradeoff points (e.g., 3 \times
                 faster with 20 \times less memory) while degrading the
                 throughput only 1\% to 5\%.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Embedded software; graph partitioning; multi-core
                 hardware; streaming applications; task assignment",
}

@Article{Chattopadhyay:2009:PPA,
  author =       "A. Chattopadhyay and H. Ishebabi and X. Chen and Z.
                 Rakosi and K. Karuri and D. Kammler and R. Leupers and
                 G. Ascheid and H. Meyr",
  title =        "Pre- and postfabrication architecture exploration for
                 partially reconfigurable {VLIW} processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457259",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern application-specific instruction-set processors
                 (ASIPs) face the daunting task of delivering high
                 performance for a wide range of applications. For
                 enhancing the performance, architectural features
                 (e.g., pipelining, VLIW) are often employed in ASIPs,
                 leading to high design complexity. Integrated ASIP
                 design environments like template-based approaches and
                 language-driven approaches provide an answer to this
                 growing design complexity. At the same time, increasing
                 hardware design costs have motivated the processor
                 designers to introduce high flexibility in the
                 processor. Flexibility, in its most effective form, can
                 be introduced to the ASIP by coupling a reconfigurable
                 unit to the base processor. Due to its obvious
                 benefits, several reconfigurable ASIPs (rASIPs) have
                 been designed for years. This design paradigm gained
                 momentum with the advent of coarse-grained FPGAs, where
                 the lack of domain-specific performance common in
                 general-purpose FPGAs are largely overcome by choosing
                 application-dependent basic functional units. These
                 rASIP designs lack a generic flow from high-level
                 specification, resulting into intuitive design
                 decisions and hard-to-retarget processor design tools.
                 Although partial, template-based approaches for rASIP
                 design is existent, a clear design methodology
                 especially for the prefabrication architecture
                 exploration is not present. In order to address this
                 issue, a high-level specification and design
                 methodology for partially reconfigurable VLIW
                 processors is proposed in this article. To show the
                 benefit of this approach a commercial VLIW processor is
                 used as the base architecture and two domains of
                 applications are studied for potential performance
                 gain.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ASIP; coarse-grained FPGA; VLIW",
}

@Article{Lin:2009:MAC,
  author =       "Yi-Neng Lin and Ying-Dar Lin and Kuo-Kun Tseng and
                 Yuan-Cheng Lai",
  title =        "Modeling and analysis of core-centric network
                 processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457260",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors can be categorized into two types,
                 the coprocessors-centric model in which the data-plane
                 is handled by coprocessors, and the core-centric model
                 in which the core processes most of the data-plane
                 packets yet offloading some tasks to coprocessors.
                 While the former has been properly explored over
                 various applications, researches regarding the latter
                 remain limited. Based on the previous experience of
                 prototyping the virtual private network (VPN) over the
                 IXP425 network processor, this work aims to derive
                 design implications for the core-centric model
                 performing computational intensive applications. From
                 system and IC vendors' perspectives, the
                 continuous-time Markov chain and Petri net simulations
                 are adopted to explore this architecture. Analytical
                 results prove to be quite inline with those of the
                 simulation and implementation. With subsequent
                 investigation we find that appropriate process run
                 lengths can improve the effective core utilization by
                 2.26 times, and by offloading the throughput boosts 7.5
                 times. The results also suggest single process
                 programming since context switch overhead impacts
                 considerably on the performance.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "core-centric; embedded system; modeling; Network
                 processor; simulation",
}

@Article{Zhou:2009:CLC,
  author =       "Xiangrong Zhou and Peter Petrov",
  title =        "Cross-layer customization for rapid and low-cost task
                 preemption in multitasked embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457261",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Preemptive multitasking is widely used in many
                 low-cost and real-time embedded applications for its
                 superior hardware utilization. The frequent and
                 asynchronous context switches, however, require the
                 preservation and restoration of the task state, thus
                 resulting in a large number of memory transfer
                 instructions. As a consequence, task responsiveness and
                 application throughput can be significantly
                 deteriorated. To address this problem we propose a
                 cross-layer customization framework which through the
                 close cooperation of compiler, OS, and hardware
                 architecture achieves rapid and low-cost task switch.
                 Application information extracted during compile-time
                 regarding state liveness is exploited in order to
                 preserve a minimal amount of task state on task
                 preemption. We introduce two complementary techniques
                 to implement the application-aware state preservation.
                 The first technique utilizes compiler-generated custom
                 routines which preserve/restore an extremely small live
                 context at judiciously selected points in the
                 application code. The second technique requires more
                 sophisticated hardware support. It employs an
                 OS-controlled register file mapping to achieve a rapid
                 context switch. By mapping a small fraction of the
                 register file in a single clock cycle, a context switch
                 is achieved requiring no memory transfers for the
                 majority of cases to preserve/restore the live state.
                 The effect of aggressively replicated register files,
                 where each task is given its own replica, is achieved
                 with the hardware cost of only adding from 25\% to 50\%
                 extra physical registers. Through the utilization of
                 these novel mechanisms, a significant improvement on
                 task response time is achieved as the context-switch
                 cost is minimized.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Auerbach:2009:LLT,
  author =       "Joshua Auerbach and David F. Bacon and Daniel Iercan
                 and Christoph M. Kirsch and V. T. Rajan and Harald
                 R{\"o}ck and Rainer Trummer",
  title =        "Low-latency time-portable real-time programming with
                 {Exotasks}",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457262",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "{\em Exotasks\/} are a novel Java programming
                 construct that achieve three important goals. They
                 achieve low latency while allowing the fullest use of
                 Java language features, compared to previous attempts
                 to restrict the Java language for use in the
                 submillisecond domain. They support pluggable
                 schedulers, allowing easy implementation of new
                 scheduling paradigms in a real-time Java system. They
                 can achieve deterministic timing, even in the presence
                 of other Java threads, and across changes of hardware
                 and software platform. To achieve these goals, the
                 program is divided into tasks with private heaps. Tasks
                 may be strongly isolated, communicating only with each
                 other and guaranteeing determinism, or weakly isolated,
                 allowing some communication with the rest of the Java
                 application. Scheduling of the tasks' execution,
                 garbage collection, and value passing is accomplished
                 by the pluggable scheduler. Schedulers that we have
                 written employ logical execution time (LET) in
                 association with strong isolation to achieve time
                 portability. We have also built a quad-rotor model
                 helicopter, the JAviator, which we use to evaluate our
                 implementation of Exotasks in an experimental embedded
                 version of IBM's J9 real-time virtual machine. Our
                 experiments show that we are able to maintain very low
                 scheduling jitter and deterministic behavior in the
                 face of variations in both software load and hardware
                 platform. We also show that Exotasks perform nearly as
                 well as Eventrons on a benchmark audio application.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Real-time scheduling; time portability; UAVs; virtual
                 machine",
}

@Article{Ahn:2009:RCT,
  author =       "Minwook Ahn and Yunheung Paek",
  title =        "Register coalescing techniques for heterogeneous
                 register architecture with copy sifting",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457263",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Optimistic coalescing has been proven as an elegant
                 and effective technique that provides better chances of
                 safely coloring more registers in register allocation
                 than other coalescing techniques. Its algorithm
                 originally assumes homogeneous registers, which are all
                 gathered in the same register file. Although this
                 register architecture is still common in most
                 general-purpose processors, embedded processors often
                 contain heterogeneous registers, which are scattered in
                 physically different register files dedicated for each
                 dissimilar purpose and use. In this work, we show that
                 optimistic coalescing is also useful for an embedded
                 processor to better handle such heterogeneity of the
                 register architecture, and developed a modified
                 algorithm for optimal coalescing that helps a register
                 allocator. In the experiment, an existing register
                 allocator was able to achieve up to 13.0\% reduction in
                 code size through our coalescing, and avoid many spills
                 that would have been generated without our scheme.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compiler; embedded processors; heterogeneous register
                 architecture; Register allocation; register
                 coalescing",
}

@Article{Mozumdar:2009:CSP,
  author =       "Mohammad Mostafizur Rahman Mozumdar and Luciano
                 Lavagno and Laura Vanzago",
  title =        "A comparison of software platforms for wireless sensor
                 networks: {MANTIS}, {TinyOS}, and {ZigBee}",
  journal =      j-TECS,
  volume =       "8",
  number =       "2",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1457255.1457264",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Feb 5 19:15:05 MST 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless sensor networks are characterized by very
                 tight code size and power constraints and by a lack of
                 well-established standard software development
                 platforms such as Posix. In this article, we present a
                 comparative study between a few fairly different such
                 platforms, namely MANTIS, TinyOS, and ZigBee, when
                 considering them from the application developer's
                 perspective, that is, by focusing mostly on functional
                 aspects, rather than on performance or code size. In
                 other words, we compare both the tasking model used by
                 these platforms and the API libraries they offer.
                 Sensor network applications are basically event based,
                 so most of the software platforms are also built on
                 considering event handling mechanism, however some use
                 a more traditional thread based model. In this article,
                 we consider implementations of a simple generic
                 application in MANTIS, TinyOS, and the Ember ZigBee
                 development framework, with the goal of depicting major
                 differences between these platforms, and suggesting a
                 programming style aimed at maximizing portability
                 between them.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "application porting; software platform; Wireless
                 sensor networks",
}

@Article{Unnikrishnan:2009:RMR,
  author =       "P. Unnikrishnan and G. Chen and M. Kandemir and M.
                 Karakoy and I. Kolcu",
  title =        "Reducing memory requirements of resource-constrained
                 applications",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "17:1--17:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509289",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded computing platforms are often resource
                 constrained, requiring great design and implementation
                 attention to memory-power-, and heat-related
                 parameters. An important task for a compiler in such
                 platforms is to simplify the process of developing
                 applications for limited memory devices and
                 resource-constrained clients. Focusing on
                 array-intensive embedded applications to be executed on
                 single CPU-based architectures, this work explores how
                 loop-based compiler optimizations can be used for
                 increasing memory location reuse. Our goal is to
                 transform a given application in such a way that the
                 resulting code has fewer cases (as compared to the
                 original code), where the lifetimes of array elements
                 overlap. The reduction in lifetimes of array elements
                 can then be exploited by reusing memory locations as
                 much as possible. Our experimental results indicate
                 that the proposed strategy reduces data space
                 requirements of 15 resource constrained applications by
                 more than 40\%, on average. We also demonstrate how
                 this strategy can be combined with data locality (cache
                 behavior)--enhancing techniques so that a compiler can
                 take advantage of both, that is, reduce data memory
                 requirements and improve data locality at the same
                 time.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compilers; embedded system; lifetime; Memory; reuse",
}

@Article{Weng:2009:AMN,
  author =       "Ning Weng and Tilman Wolf",
  title =        "Analytic modeling of network processors for parallel
                 workload mapping",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509290",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Network processors are heterogeneous system-on-chip
                 multiprocessors that are optimized to perform packet
                 forwarding and processing tasks at Gigabit data rates.
                 To meet the performance demands of increasing link
                 speeds and complex network applications, network
                 processors are implemented with several dozen embedded
                 processor cores and hardware accelerators that run
                 multiple packet processing applications in parallel.
                 The parallel nature of the processing system makes it
                 increasingly difficult for application developers to
                 understand and manage resources and map processing
                 tasks to the hardware. To address this problem, we
                 present a methodology for profiling and analyzing
                 network processor applications, mapping processing
                 tasks to a generalized network processor architecture,
                 and analytically determining the expected throughput
                 performance. The key novelty of this work is not only
                 the adaptation of application analysis and mapping
                 algorithms to heterogeneous network processors, but
                 also that the entire process can be automated and
                 hidden from the application developer. Starting with
                 the analysis of a uniprocessor implementation of the
                 application, the process yields a mapping of the
                 partitioned application that shows best performance for
                 a given network processor system. The simplicity of the
                 proposed randomized mapping algorithm allows the use of
                 this methodology in network processor runtime systems
                 where dynamic reallocation of tasks is necessary but
                 processing power is limited. We present results that
                 show the effectiveness of the analysis and mapping
                 methodology as well as its application to design space
                 exploration.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Application profiling; embedded systems;
                 multiprocessor scheduling; network processors",
}

@Article{Tseng:2009:FSA,
  author =       "Kuo-Kun Tseng and Yuan-Cheng Lai and Ying-Dar Lin and
                 Tsern-Huei Lee",
  title =        "A fast scalable automaton-matching accelerator for
                 embedded content processors",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "19:1--19:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509291",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Home and office network gateways often employ a
                 cost-effective embedded network processor to handle
                 their network services. Such network gateways have
                 received strong demand for applications dealing with
                 intrusion detection, keyword blocking, antivirus and
                 antispam. Accordingly, we were motivated to propose an
                 appropriate fast scalable automaton-matching (FSAM)
                 hardware to accelerate the embedded network processors.
                 Although automaton matching algorithms are robust with
                 deterministic matching time, there is still plenty of
                 room for improving their average-case performance. FSAM
                 employs novel prehash and root-index techniques to
                 accelerate the matching for the nonroot states and the
                 root state, respectively, in automation based hardware.
                 The prehash approach uses some hashing functions to
                 pretest the input substring for the nonroot states
                 while the root-index approach handles multiple bytes in
                 one single matching for the root state. Also, FSAM is
                 applied in a prevalent automaton algorithm,
                 Aho--Corasick (AC), which is often used in many
                 content-filtering applications. When implemented in
                 FPGA, FSAM can perform at the rate of 11.1Gbps with the
                 pattern set of 32,634 bytes, demonstrating that our
                 proposed approach can use a small logic circuit to
                 achieve a competitive performance, although a larger
                 memory is used. Furthermore, the amount of patterns in
                 FSAM is not limited by the amount of internal circuits
                 and memories. If the high-speed external memories are
                 employed, FSAM can support up to 21,302 patterns while
                 maintaining similar high performance.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Aho--Corasick; automaton; Bloom filter; content
                 filtering; String matching",
}

@Article{Reshadi:2009:HCS,
  author =       "Mehrdad Reshadi and Prabhat Mishra and Nikil Dutt",
  title =        "Hybrid-compiled simulation: an efficient technique
                 for instruction-set architecture simulation",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "20:1--20:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509292",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Instruction-set simulators are critical tools for the
                 exploration and validation of new processor
                 architectures. Due to the increasing complexity of
                 architectures and time-to-market pressure, performance
                 is the most important feature of an instruction-set
                 simulator. Interpretive simulators are flexible but
                 slow, whereas compiled simulators deliver speed at the
                 cost of flexibility and compilation overhead. This
                 article presents a hybrid instruction-set-compiled
                 simulation (HISCS) technique for generation of fast
                 instruction-set simulators that combines the benefit of
                 both compiled and interpretive simulation. This article
                 makes two important contributions: (i) it improves the
                 interpretive simulation performance by applying
                 compiled simulation at the instruction level using a
                 novel template-customization technique to generate
                 optimized decoded instructions during compile time; and
                 (ii) it reduces the compile-time overhead by combining
                 the benefits of both static and dynamic-compiled
                 simulation. Our experimental results using two
                 contemporary processors (ARM7 and SPARC) demonstrate an
                 order-of-magnitude reduction in compilation time as
                 well as a 70\% performance improvement, on average,
                 over the best-known published result in instruction-set
                 simulation.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Compiled simulation; instruction set architecture;
                 interpretive simulation; partial evaluation",
}

@Article{Nguyen:2009:MAE,
  author =       "Nghi Nguyen and Angel Dominguez and Rajeev Barua",
  title =        "Memory allocation for embedded systems with a
                 compile-time-unknown scratch-pad size",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "21:1--21:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509293",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents the first memory allocation
                 scheme for embedded systems having a scratch-pad memory
                 whose size is unknown at compile time. A scratch-pad
                 memory (SPM) is a fast compiler-managed SRAM that
                 replaces the hardware-managed cache. All existing
                 memory allocation schemes for SPM require the SPM size
                 to be known at compile time. Unfortunately, because of
                 this constraint, the resulting executable is tied to
                 that size of SPM and is not portable to other processor
                 implementations having a different SPM size.
                 Size-portable code is valuable when programs are
                 downloaded during deployment either via a network or
                 portable media. Code downloads are used for fixing bugs
                 or for enhancing functionality. The presence of
                 different SPM sizes in different devices is common
                 because of the evolution in VLSI technology across
                 years. The result is that SPM cannot be used in such
                 situations with downloaded codes.\par

                 To overcome this limitation, our work presents a
                 compiler method whose resulting executable is portable
                 across SPMs of any size. Our technique is to employ a
                 customized installer software, which decides the SPM
                 allocation just before the program's first run, since
                 the SPM size can be discovered at that time. The
                 installer then, based on the decided allocation,
                 modifies the program executable accordingly. The
                 resulting executable places frequently used objects in
                 SPM, considering both code and data for placement. To
                 keep the overhead low, much of the preprocessing for
                 the allocation is done at compile time. Results show
                 that our benchmarks average a 41\% speedup versus an
                 all-DRAM allocation, while the optimal static
                 allocation scheme, which knows the SPM size at compile
                 time and is thus an unachievable upper-bound and is
                 only slightly faster (45\% faster than all-DRAM).
                 Results also show that the overhead from our customized
                 installer averages about 1.5\% in code size, 2\% in
                 runtime, and 3\% in compile time for our benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compiler; data linked list; downloadable codes;
                 embedded loading; embedded systems; Memory allocation;
                 scratch-pad",
}

@Article{Lysecky:2009:DIM,
  author =       "Roman Lysecky and Frank Vahid",
  title =        "Design and implementation of a {MicroBlaze}-based warp
                 processor",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "22:1--22:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509294",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "While soft processor cores provided by FPGA vendors
                 offer designers with increased flexibility, such
                 processors typically incur penalties in performance and
                 energy consumption compared to hard processor core
                 alternatives. The recently developed technology of warp
                 processing can help reduce those penalties. Warp
                 processing is the dynamic and transparent
                 transformation of critical software regions from
                 microprocessor execution to much faster circuit
                 execution on an FPGA. In this article, we describe an
                 implementation of a warp processor on a Xilinx
                 Virtex-II Pro and Spartan3 FPGAs incorporating one or
                 more MicroBlaze soft processor cores. We further
                 provide a detailed analysis of the energy overhead of
                 dynamically partitioning an application's kernels to
                 hardware executing within an FPGA. Considering an
                 implementation that periodically partitions the
                 executing application once every minute, a
                 MicroBlaze-based warp processor implemented on a
                 Spartan3 FPGA achieves average speedups of 5.8\times
                 and energy reductions of 49\% compared to the
                 MicroBlaze soft processor core alone --- providing
                 competitive performance and energy consumption compared
                 to existing hard processor cores.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "configurable logic; dynamic optimization; FPGA;
                 hardware/software partitioning; just-in-time (JIT)
                 compilation; soft processor cores; Warp processors",
}

@Article{Bai:2009:MME,
  author =       "Lan S. Bai and Lei Yang and Robert P. Dick",
  title =        "{MEMMU}: {Memory} expansion for {MMU}-less embedded
                 systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509295",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Random access memory (RAM) is tightly constrained in
                 the least expensive, lowest-power embedded systems such
                 as sensor network nodes and portable consumer
                 electronics. The most widely used sensor network nodes
                 have only 4 to 10KB of RAM and do not contain memory
                 management units (MMUs). It is difficult to implement
                 complex applications under such tight memory
                 constraints. Nonetheless, price and power-consumption
                 constraints make it unlikely that increases in RAM in
                 these systems will keep pace with the increasing memory
                 requirements of applications.\par

                 We propose the use of automated compile-time and
                 runtime techniques to increase the amount of usable
                 memory in MMU-less embedded systems. The proposed
                 techniques do not increase hardware cost, and require
                 few or no changes to existing applications. We have
                 developed runtime library routines and compiler
                 transformations to control and optimize the automatic
                 migration of application data between compressed and
                 uncompressed memory regions, as well as a fast
                 compression algorithm well suited to this application.
                 These techniques were experimentally evaluated on
                 Crossbow TelosB sensor network nodes running a number
                 of data-collection and signal-processing applications.
                 Our results indicate that available memory can be
                 increased by up to 50\% with less than 10\% performance
                 degradation for most benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Data compression; embedded system; wireless sensor
                 network",
}

@Article{Doblander:2009:NSF,
  author =       "Andreas Doblander and Andreas Zoufal and Bernhard
                 Rinner",
  title =        "A novel software framework for embedded multiprocessor
                 smart cameras",
  journal =      j-TECS,
  volume =       "8",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1509288.1509296",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Apr 21 16:29:24 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed smart cameras (DSC) are an emerging
                 technology for a broad range of important applications
                 including smart rooms, surveillance, entertainment,
                 tracking, and motion analysis. By having access to many
                 views and through cooperation among the individual
                 cameras, these DSCs have the potential to realize many
                 more complex and challenging applications than
                 single-camera systems.\par

                 This article focuses on the system-level software
                 required for efficient streaming applications on single
                 smart cameras as well as on networks of DSCs. Embedded
                 platforms with limited resources do not provide
                 middleware services well known on general-purpose
                 platforms. Our software framework supports transparent
                 intra- and interprocessor communication while keeping
                 the memory and computation overhead very low. The
                 software framework is based on a publisher--subscriber
                 architecture and provides mechanisms for dynamically
                 loading and unloading software components as well as
                 for graceful degradation in case of software- and
                 hardware-related faults. The software framework has
                 been completely implemented and tested on our embedded
                 smart cameras consisting of an ARM-based network
                 processor and several digital signal processors. Two
                 case studies demonstrate the feasibility of our
                 approach.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "distributed embedded systems; fault tolerance;
                 publisher--subscriber; Smart cameras; video
                 surveillance",
}

@Article{Li:2009:ELC,
  author =       "Zhiyuan Li and Santosh Pande",
  title =        "Editorial: {Languages}, compilers, and tools for
                 embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550988",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Murray:2009:CTI,
  author =       "Alastair C. Murray and Richard V. Bennett and
                 Bj{\"o}rn Franke and Nigel Topham",
  title =        "Code transformation and instruction set extension",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550989",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The demand for flexible embedded solutions and short
                 time-to-market has led to the development of extensible
                 processors that allow for customization through
                 user-defined instruction set extensions (ISEs). These
                 are usually identified from plain C sources. In this
                 article, we propose a combined exploration of code
                 transformations and ISE identification. The resulting
                 performance of such a combination has been measured on
                 two benchmark suites. Our results demonstrate that
                 combined code transformations and ISEs can yield
                 average performance improvements of 49\%. This
                 outperforms ISEs when applied in isolation, and in
                 extreme cases yields a speed-up of 2.85.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ASIPs; compilers; Customizable processors; design
                 space exploration; instruction set extension;
                 source-level transformations",
}

@Article{Hu:2009:CAS,
  author =       "Jie Hu and Feihui Li and Vijay Degalahal and Mahmut
                 Kandemir and N. Vijaykrishnan and Mary J. Irwin",
  title =        "Compiler-assisted soft error detection under
                 performance and energy constraints in embedded
                 systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550990",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Soft errors induced by terrestrial radiation are
                 becoming a significant concern in architectures
                 designed in newer technologies. If left undetected,
                 these errors can result in catastrophic consequences or
                 costly maintenance problems in different embedded
                 applications. In this article, we focus on utilizing
                 the compiler's help in duplicating instructions for
                 error detection in VLIW datapaths. The instruction
                 duplication mechanism is further supported by a
                 hardware enhancement for efficient result verification,
                 which avoids the need of additional comparison
                 instructions. In the proposed approach, the compiler
                 determines the instruction schedule by balancing the
                 permissible performance degradation and the energy
                 constraint with the required degree of duplication. Our
                 experimental results show that our algorithms allow the
                 designer to perform trade-off analysis between
                 performance, reliability, and energy consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compilers; Embedded systems; energy consumption;
                 instruction duplication; reliability; soft errors",
}

@Article{Jafari:2009:EPR,
  author =       "Roozbeh Jafari and Hassan Ghasemzadeh and Foad Dabiri
                 and Ani Nahapetian and Majid Sarrafzadeh",
  title =        "An efficient placement and routing technique for
                 fault-tolerant distributed embedded computing",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550991",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents an efficient technique for
                 placement and routing of sensors/actuators and
                 processing units in a grid network. The driver
                 application that we present is a medical jacket, which
                 requires an extremely high level of robustness and
                 fault tolerance. The power consumption of such jacket
                 is another key technological constraint. Our proposed
                 interconnection network is a mesh of wires. A jacket
                 made of fabric and wires would be susceptible to
                 accidental damage via tears. By modeling the tears, we
                 evaluate the probability of having failures on every
                 segment of wires in our mesh interconnection network.
                 Then, we study two problems of placement and routing in
                 the sensor networks such that the fault tolerance is
                 maximized while the power consumption is minimized. We
                 develop efficient integer linear programming (ILP)
                 formulations to address these problems and perform both
                 placement and routing, simultaneously. This ensures
                 that the solution is a lower bound for both problems.
                 We evaluate the effectiveness of our proposed
                 techniques on a variety of benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Distributed embedded system; fault tolerance;
                 placement; routing; sensor networks",
}

@Article{Lee:2009:CIA,
  author =       "Edward A. Lee and Xiaojun Liu and Stephen
                 Neuendorffer",
  title =        "Classes and inheritance in actor-oriented design",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550992",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Actor-oriented components emphasize concurrency and
                 temporal semantics and are used for modeling and
                 designing embedded software and hardware. Actors
                 interact with one another through ports via a messaging
                 schema that can follow any of several concurrent
                 semantics. Domain-specific actor-oriented languages and
                 frameworks are common (Simulink, LabVIEW, SystemC,
                 etc.). However, they lack many modularity and
                 abstraction mechanisms that programmers have become
                 accustomed to in object-oriented components, such as
                 classes, inheritance, interfaces, and polymorphism,
                 except as inherited from the host language. This
                 article shows a form that such mechanisms can take in
                 actor-oriented components, gives a formal structure,
                 and describes a prototype implementation. The
                 mechanisms support actor-oriented class definitions,
                 subclassing, inheritance, and overriding. The formal
                 structure imposes structural constraints on a model
                 (mainly the ``derivation invariant'') that lead to a
                 policy to govern inheritance. In particular, the
                 structural constraints permit a disciplined form of
                 multiple inheritance with unambiguous inheritance and
                 overriding behavior. The policy is based formally on a
                 generalized ultrametric space with some remarkable
                 properties. In this space, inheritance is favored when
                 actors are ``closer'' (in the generalized ultrametric),
                 and we show that when inheritance can occur from
                 multiple sources, one source is always unambiguously
                 closer than the other.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Actors; components; generalized ultrametric;
                 inheritance; interfaces; overriding; type systems",
}

@Article{Riccobene:2009:SCB,
  author =       "Elvinia Riccobene and Patrizia Scandurra and Sara
                 Bocchio and Alberto Rosti and Luigi Lavazza and Luigi
                 Mantellini",
  title =        "{SystemC\slash C-based} model-driven design for
                 embedded systems",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550993",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article summarizes our effort, since 2004 up to
                 the present time, for improving the current industrial
                 Systems-on-Chip and Embedded Systems design by joining
                 the capabilities of the unified modeling language (UML)
                 and SystemC/C programming languages to operate at
                 system-level. The proposed approach exploits the OMG
                 model-driven architecture --- a framework for
                 Model-driven Engineering --- capabilities of reducing
                 abstract, coarse-grained and platform-independent
                 system models to fine-grained and platform-specific
                 models. We first defined a design methodology and a
                 development flow for the hardware, based on a SystemC
                 UML profile and encompassing different levels of
                 abstraction. We then included a multithread C UML
                 profile for modelling software applications. Both
                 SystemC/C profiles are consistent sets of modelling
                 constructs designed to lift the programming features
                 (both structural and behavioral) of the two coding
                 languages to the UML modeling level. The new codesign
                 flow is supported by an environment, which allows
                 system modeling at higher abstraction levels (from a
                 functional executable level to a register transfer
                 level) and supports automatic
                 code-generation/back-annotation from/to UML models.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "C; ES; MDE; SoC; SystemC; UML",
}

@Article{Bini:2009:MCE,
  author =       "Enrico Bini and Giorgio Buttazzo and Giuseppe
                 Lipari",
  title =        "Minimizing {CPU} energy in real-time systems with
                 discrete speed management",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550994",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a general framework to analyze
                 and design embedded systems minimizing the energy
                 consumption without violating timing requirements. A
                 set of realistic assumptions is considered in the model
                 in order to apply the results in practical real-time
                 applications. The processor is assumed to have as a set
                 of discrete operating modes, each characterized by
                 speed and power consumption. The energy overhead and
                 the transition delay incurred during mode switches are
                 considered. Task computation times are modeled with a
                 part that scales with the speed and a part having a
                 fixed duration, to take I/O operations into
                 account.\par

                 The proposed method allows to compute the optimal
                 sequence of voltage/speed changes that approximates the
                 minimum continuous speed, which guarantees the
                 feasibility of a given set of real-time tasks, without
                 violating the deadline constraints. The analysis is
                 performed both under fixed and dynamic priority
                 assignments.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "CPU energy; Real-time systems",
}

@Article{Koo:2009:FTG,
  author =       "Heon-Mo Koo and Prabhat Mishra",
  title =        "Functional test generation using design and property
                 decomposition techniques",
  journal =      j-TECS,
  volume =       "8",
  number =       "4",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1550987.1550995",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jul 23 12:32:49 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Functional verification of microprocessors is one of
                 the most complex and expensive tasks in the current
                 system-on-chip design methodology. Simulation using
                 functional test vectors is the most widely used form of
                 processor validation. A significant bottleneck in the
                 validation of such systems is the lack of automated
                 techniques for directed test generation. While existing
                 model checking--based approaches have proposed several
                 promising ideas for automated test generation, many
                 challenges remain in applying them to industrial
                 microprocessors. The time and resources required for
                 test generation using existing model checking--based
                 techniques can be prohibitively large. This article
                 presents an efficient test generation technique using
                 decompositional model checking. The contribution of the
                 article is the development of both property and design
                 decomposition procedures for efficient test generation
                 of pipelined processors. Our experimental results using
                 a multi-issue MIPS processor and an industrial
                 processor based on Power Architecture\TM{} Technology
                 demonstrate several orders-of-magnitude reduction in
                 validation effort by drastically reducing both test
                 generation time and test program length.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "design decomposition; functional validation; Model
                 checking; pipelined processor; property decomposition;
                 test generation",
}

@Article{Plaks:2009:GECa,
  author =       "Toomas P. Plaks and Neil Bergmann and Bernard
                 Pottier",
  title =        "Guest editorial {CAPA'08} configurable computing:
                 {Configuring} algorithms, processes, and architecture
                 issue {I}: {Configuring} algorithms and processes",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "1:1--1:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ferri:2009:RIF,
  author =       "B. H. Ferri and A. A. Ferri",
  title =        "Reconfiguration of {IIR} filters in response to
                 computer resource availability",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "2:1--2:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wang:2009:TTD,
  author =       "Xiaojun Wang and Miriam Leeser",
  title =        "A truly two-dimensional systolic array {FPGA}
                 implementation of {QR} decomposition",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "3:1--3:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{DoCarmoLucas:2009:ADF,
  author =       "Amilcar {Do Carmo Lucas} and Henning Sahlbach and Sean
                 Whitty and Sven Heithecker and Rolf Ernst",
  title =        "Application development with the {FlexWAFE} real-time
                 stream processing architecture for {FPGAs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "4:1--4:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nahapetian:2009:AAS,
  author =       "Ani Nahapetian and Philip Brisk and Soheil Ghiasi and
                 Majid Sarrafzadeh",
  title =        "An approximation algorithm for scheduling on
                 heterogeneous reconfigurable resources",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "5:1--5:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Patterson:2009:SMB,
  author =       "C. Patterson and P. Athanas and M. Shelburne and J.
                 Bowen and J. Sur{\'\i}s and T. Dunham and J. Rice",
  title =        "Slotless module-based reconfiguration of embedded
                 {FPGAs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "6:1--6:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lloyd:2009:PSN,
  author =       "Scott Lloyd and Quinn Snell",
  title =        "A packet-switched network architecture for
                 reconfigurable computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "7:1--7:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lubbers:2009:RMP,
  author =       "Enno L{\"u}bbers and Marco Platzner",
  title =        "{ReconOS}: {Multithreaded} programming for
                 reconfigurable computers",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "8:1--8:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huang:2009:SFB,
  author =       "Jian Huang and Matthew Parris and Jooheung Lee and
                 Ronald F. Demara",
  title =        "Scalable {FPGA}-based architecture for {DCT}
                 computation using dynamic partial reconfiguration",
  journal =      j-TECS,
  volume =       "9",
  number =       "1",
  pages =        "9:1--9:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:40:57 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Plaks:2009:GECb,
  author =       "Toomas P. Plaks and Neil Bergmann and Bernard
                 Pottier",
  title =        "Guest editorial {CAPA'08 Configurable} computing:
                 {Configuring} algorithms, processes, and architecture
                 {Issue II}: {Configuring} hardware architecture",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "10:1--10:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Alle:2009:RRR,
  author =       "Mythri Alle and Keshavan Varadarajan and Alexander
                 Fell and Ramesh Reddy C. and Nimmy Joseph and Saptarsi
                 Das and Prasenjit Biswas and Jugantor Chetia and Adarsh
                 Rao and S. K. Nandy and Ranjani Narayan",
  title =        "{REDEFINE}: {Runtime} reconfigurable polymorphic
                 {ASIC}",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Banerjee:2009:FPU,
  author =       "Pritha Banerjee and Susmita Sur-Kolay and Arijit
                 Bishnu and Sandip Das and Subhas C. Nandy and Subhasis
                 Bhattacharjee",
  title =        "{FPGA} placement using space-filling curves: {Theory}
                 meets practice",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Beckett:2009:PSM,
  author =       "Paul Beckett",
  title =        "Power scalability in a mesh-connected reconfigurable
                 architecture",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhao:2009:STT,
  author =       "Weisheng Zhao and Eric Belhaire and Claude Chappert
                 and Pascale Mazoyer",
  title =        "Spin transfer torque {(STT)-MRAM--based} runtime
                 reconfiguration {FPGA} circuit",
  journal =      j-TECS,
  volume =       "9",
  number =       "2",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:00 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2010:CPV,
  author =       "Hyung Sun Lee and Byung Kook Kim",
  title =        "Coscheduling of processor voltage and control task
                 period for energy-efficient control systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reddy:2010:CPE,
  author =       "Rakesh Reddy and Peter Petrov",
  title =        "Cache partitioning for energy-efficient and
                 interference-free embedded multitasking",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Geelen:2010:MES,
  author =       "Bert Geelen and Vissarion Ferentinos and Francky
                 Catthoor and Gauthier Lafruit and Diederik Verkest and
                 Rudy Lauwereins and Thanos Stouraitis",
  title =        "Modeling and exploiting spatial locality trade-offs in
                 wavelet-based applications under varying resource
                 requirements",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Bueno:2010:ORA,
  author =       "David Bueno and Chris Conger and Alan D. George",
  title =        "Optimizing {rapidIO} architectures for onboard
                 processing",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Inoue:2010:RSC,
  author =       "Hiroaki Inoue and Junji Sakai and Masato Edahiro",
  title =        "A robust seamless communication architecture for
                 next-generation mobile terminals on multi-{CPU}
                 {SoCs}",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Manzanares:2010:CER,
  author =       "Adam Manzanares and Xiaojun Ruan and Shu Yin and Xiao
                 Qin and Adam Roth and Mais Najim",
  title =        "Conserving energy in real-time storage systems with
                 {I/O} burstiness",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Courbot:2010:EBD,
  author =       "Alexandre Courbot and Gilles Grimaud and Jean-Jacques
                 Vandewalle",
  title =        "Efficient off-board deployment and customization of
                 virtual machine-based embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Xue:2010:IRP,
  author =       "Chun Jason Xue and Jingtong Hu and Zili Shao and Edwin
                 Sha",
  title =        "Iterational retiming with partitioning: {Loop}
                 scheduling with complete memory latency hiding",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cho:2010:LFS,
  author =       "Hyeonjoong Cho and Binoy Ravindran and E. Douglas
                 Jensen",
  title =        "Lock-free synchronization for dynamic embedded
                 real-time systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Musoll:2010:CEL,
  author =       "Enric Musoll",
  title =        "A cost-effective load-balancing policy for tile-based,
                 massive multi-core packet processors",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Guang:2010:HAM,
  author =       "Liang Guang and Ethiopia Nigussie and Pekka Rantala
                 and Jouni Isoaho and Hannu Tenhunen",
  title =        "Hierarchical agent monitoring design approach towards
                 self-aware parallel systems-on-chip",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{McLoughlin:2010:RTR,
  author =       "Ian Vince McLoughlin and Timo Rolf Bretschneider",
  title =        "Reliability through redundant parallelism for
                 micro-satellite computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yang:2010:OMC,
  author =       "Lei Yang and Robert P. Dick and Haris Lekatsas and
                 Srimat Chakradhar",
  title =        "Online memory compression for embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cesana:2010:MBM,
  author =       "Ulpian Cesana and Zhen He",
  title =        "Multi-buffer manager: {Energy-efficient} buffer
                 manager for databases on flash memory",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tichy:2010:GAF,
  author =       "Milan Tichy and Jan Schier and David Gregg",
  title =        "{GSFAP} adaptive filtering using log arithmetic for
                 resource-constrained embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "3",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2010",
  CODEN =        "????",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Mar 15 18:41:02 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yang:2010:HPO,
  author =       "Lei Yang and Robert P. Dick and Haris Lekatsas and
                 Srimat Chakradhar",
  title =        "High-performance operating system controlled online
                 memory compression",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "30:1--30:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721696",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Online memory compression is a technology that
                 increases the amount of memory available to
                 applications by dynamically compressing and
                 decompressing their working datasets on demand. It has
                 proven extremely useful in embedded systems with tight
                 physical RAM constraints. The technology can be used to
                 increase functionality, reduce size, and reduce cost,
                 without modifying applications or hardware. This
                 article presents a new software-based online memory
                 compression algorithm for embedded systems. In
                 comparison with the best algorithms used in online
                 memory compression, our new algorithm has a competitive
                 compression ratio but is twice as fast. In addition, we
                 describe several practical problems encountered in
                 developing an online memory compression infrastructure
                 and present solutions. We present a method of
                 adaptively managing the uncompressed and compressed
                 memory regions during application execution. This
                 memory management scheme adapts to the predicted memory
                 requirements of applications. It permits efficient
                 compression for a wide range of applications. We have
                 evaluated our techniques on a portable embedded device
                 and have found that the memory available to
                 applications can be increased by 2.5\times with
                 negligible performance and power consumption penalties,
                 and with no changes to hardware or applications. Our
                 techniques allow existing applications to execute with
                 less physical memory. They also allow applications with
                 larger working datasets to execute on unchanged
                 embedded system hardware, thereby increasing
                 functionality.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "compression; Embedded system; memory",
}

@Article{Wu:2010:SAF,
  author =       "Chin-Hsien Wu",
  title =        "A self-adjusting flash translation layer for
                 resource-limited embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "31:1--31:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721697",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The capacity of flash memory storage systems has been
                 growing at a speed similar to many other storage
                 systems. In order to properly manage the product cost,
                 vendors face serious challenges in resource-limited
                 embedded systems. In this article, a self-adjusting
                 flash translation layer is proposed with low memory
                 requirements. The objective of the design is to provide
                 efficient address mapping and low garbage collection
                 overhead, while controlling main memory usage of the
                 flash translation layer. The capability of the design
                 is evaluated over realistic workloads and benchmarks.
                 System performance is also guaranteed under low memory
                 requirements.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "embedded systems; Flash memory; flash translation
                 layer; storage systems",
}

@Article{Irturk:2010:GAG,
  author =       "Ali Irturk and Bridget Benson and Shahnam Mirzaei and
                 Ryan Kastner",
  title =        "{GUSTO}: an automatic generation and optimization
                 tool for matrix inversion architectures",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721698",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Matrix inversion is a common function found in many
                 algorithms used in wireless communication systems. As
                 FPGAs become an increasingly attractive platform for
                 wireless communication, it is important to understand
                 the trade-offs in designing a matrix inversion core on
                 an FPGA. This article describes a matrix inversion core
                 generator tool, GUSTO, that we developed to ease the
                 design space exploration across different matrix
                 inversion architectures. GUSTO is the first tool of its
                 kind to provide automatic generation of a variety of
                 general-purpose matrix inversion architectures with
                 different parameterization options. GUSTO also provides
                 an optimized application-specific architecture with an
                 average of 59\% area decrease and 3X throughput
                 increase over its general-purpose architecture. The
                 optimized architectures generated by GUSTO provide
                 comparable results to published matrix inversion
                 architecture implementations, but offer the advantage
                 of providing the designer the ability to study the
                 trade-offs between architectures with different design
                 parameters.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "design space exploration; Field programmable gate
                 arrays (FPGAs); matrix inversion",
}

@Article{Yu:2010:FSB,
  author =       "Yue Yu and Shangping Ren and Ophir Frieder",
  title =        "Feasibility of semiring-based timing constraints",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721699",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time and embedded applications often involve
                 different types of timing constraints, such as
                 precedence constraints and real-time constraints. As
                 real-time and embedded applications further advance,
                 new timing constraint types are emerging as well.
                 Recent research on interval-based timing constraints is
                 an example. Thus, it is important to have a uniformed
                 timing constraint representation so that a generalized
                 approach can be developed to analyze the variant
                 constraint types.\par

                 A semiring-based timing constraint model is introduced
                 to generalize the representations of different
                 constraint types. Under this model, we develop an
                 algorithm to check the satisfaction feasibility for a
                 given set of semiring-based timing constraints. This
                 algorithm provides better performance in the average
                 case as compared to applying the Bellman-Ford algorithm
                 directly on the constraint set.\par

                 In addition, for a set of feasible semiring-based
                 timing constraints, event occurrence points that
                 satisfy the constraint set form a (hyperdimension)
                 feasible region. For the given two sets of timing
                 constraints, we develop a necessary and sufficient
                 condition to testify whether the two constraint sets'
                 feasible regions have an inclusion relation. If one
                 feasible region is included in the other, we know that
                 the real-time event occurrences that satisfy the
                 included constraint set will necessarily satisfy the
                 including set.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "timing constraint feasibility analysis; Timing
                 constraints",
}

@Article{Tahaee:2010:PAP,
  author =       "Seyed-Abdoreza Tahaee and Amir Hossein Jahangir",
  title =        "A polynomial algorithm for partitioning problems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721700",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article takes a theoretical approach to focus on
                 the algorithmic properties of hardware/software
                 partitioning. It proposes a method with polynomial
                 complexity to find the global optimum of an NP-hard
                 model partitioning problem for 75\% of occurrences
                 under some practical conditions. The global optimum is
                 approached with a lower bound distance for the
                 remaining 25\%. Furthermore, this approach ensures
                 finding the 2-approximate of the global optimum
                 partition in 97\% of instances where technical
                 assumptions exist. The strategy is based on
                 intelligently changing the parameters of the polynomial
                 model of the partitioning problem to force it to
                 produce (or approach) the exact solution to the NP-hard
                 model.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "hardware/software codesign; maximum flow minimum cut
                 problem; NP-hard problems; Partitioning problem",
}

@Article{Peng:2010:OWZ,
  author =       "Huan-Kai Peng and Youn-Long Lin",
  title =        "An optimal warning-zone-length assignment algorithm
                 for real-time and multiple-{QoS} on-chip bus
                 arbitration",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721701",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In an advanced System-on-Chip (SoC) for real-time
                 applications, the arbiter of its on-chip communication
                 subsystem needs to support multiple QoS criteria while
                 providing a hard real-time guarantee. To fulfill both
                 objectives, the arbitration algorithm must dynamically
                 switch between NonReal-Time (NRT) and Real-Time (RT)
                 modes such that use of the RT mode is minimized to best
                 accommodate the overall QoS criteria. In this article,
                 we define a model for this problem, and propose optimal
                 solutions to its associated problems with static and
                 dynamic warning-zone-length assignment. Compared with
                 previous works, the proposed approach enables a bus
                 arbiter to use much less RT mode in providing a
                 Real-Time (RT) guarantee and, therefore, gives the
                 arbiter more opportunity to employ non-RT modes to
                 achieve better overall QoS. Experimental results show
                 that the proposed approach reduces RT mode usage by as
                 much as 37.1\%. Moreover, that reduction in RT mode
                 usage helps cut the execution time by 27.0\% when
                 applying our approach to an industrial DRAM controller.
                 Another case study on an AMBA-compliant
                 ultra-high-resolution H.264 decoder IP shows that the
                 proposed approach reduces RT mode usage by 26.4\%,
                 which leads to an average reduction of 10.4\% in
                 decoding time. Finally, when implementing a 16 master
                 arbiter, it costs only 6.9K and 9.5K gates of overhead
                 using the proposed static and dynamic approach,
                 respectively. Therefore, the proposed approach is
                 suitable for real-time SoC applications.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "on-chip communication; QoS; real-time scheduling;
                 System-on-Chip",
}

@Article{Schlich:2010:MCS,
  author =       "Bastian Schlich",
  title =        "Model checking of software for microcontrollers",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721702",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The interest of industries in model checking software
                 for microcontrollers is increasing. However, there are
                 currently no appropriate tools that can be applied by
                 embedded systems developers for the direct verification
                 of software for microcontrollers without the need for
                 manual modeling. This article describes a new approach
                 to model checking software for microcontrollers, which
                 verifies the assembly code of the software. The state
                 space is built using a tailored simulator, which
                 abstracts from time, handles nondeterminism, and
                 creates an overapproximation of the behavior shown by
                 the real microcontroller. Within this simulator, we
                 apply abstraction techniques to tackle the
                 state-explosion problem. In our approach, we combine
                 different formal methods, namely, model checking,
                 static analysis, and abstract interpretation. We also
                 combine explicit and symbolic model checking
                 techniques. This article presents a case study using
                 several programs to demonstrate the efficiency of the
                 applied abstraction techniques and to show the
                 applicability of this approach.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Assembly code; formal verification; microcontroller;
                 model checking; static analysis",
}

@Article{Bombieri:2010:SND,
  author =       "Nicola Bombieri and Franco Fummi and Davide Quaglia",
  title =        "System\slash network design-space exploration based on
                 {TLM} for networked embedded systems",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721703",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a methodology for the design of
                 Networked Embedded Systems (NESs), which extends
                 Transaction Level Modeling (TLM) to perform
                 system/network design-space exploration. As a result, a
                 new design dimension is added to the traditional TLM
                 refinement process to represent network configuration
                 alternatives. Each network configuration can be used to
                 drive both architecture exploration and system
                 validation after each refinement step. A system/network
                 simulation taxonomy is investigated aiming at precisely
                 identifying the role of cosimulation in system/network
                 design-space exploration. Furthermore, a general
                 criterion to map functionalities to system and network
                 models is presented. As a case study, the proposed
                 methodology is applied to the design of a Voice-over-IP
                 client.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "networked embedded systems; Transaction level
                 modeling",
}

@Article{Lin:2010:SSA,
  author =       "Chang Hong Lin and Marilyn Wolf and Xenefon Koutsoukos
                 and Sandeep Neema and Janos Sztipanovits",
  title =        "System and software architectures of distributed smart
                 cameras",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721704",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we describe a distributed,
                 peer-to-peer gesture recognition system along with a
                 software architecture modeling technique and authority
                 control protocol for ubiquitous cameras. This system
                 performs gesture recognition in real time by combining
                 imagery from multiple cameras without using a central
                 server. We propose a system architecture that uses a
                 network of inexpensive cameras to perform in-network
                 video processing. A methodology for transforming
                 well-designed single-node algorithm to distributed
                 system is also proposed. Applications for ubiquitous
                 cameras can be modeled as the composition of a
                 finite-state machine of the system, functional
                 services, and middleware. A service-oriented software
                 architecture is proposed to dynamically reconfigure
                 services when system state changes. By exchanging data
                 and control messages between neighboring sensors, each
                 node can maintain broader view of the environment with
                 integrated video-processing results. Our prototype
                 system is built on Windows machines, and uses standard
                 video cameras as sensors and local network as a
                 communication channel.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Distributed cameras; smart camera; software
                 architecture",
}

@Article{Zhou:2010:MMS,
  author =       "Gang Zhou and Yafeng Wu and Ting Yan and Tian He and
                 Chengdu Huang and John A. Stankovic and Tarek
                 F. Abdelzaher",
  title =        "A multifrequency {MAC} specially designed for wireless
                 sensor network applications",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721705",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multifrequency media access control has been well
                 understood in general wireless ad hoc networks, while
                 in wireless sensor networks, researchers still focus on
                 single frequency solutions. In wireless sensor
                 networks, each device is typically equipped with a
                 single radio transceiver and applications adopt much
                 smaller packet sizes compared to those in general
                 wireless ad hoc networks. Hence, the multifrequency MAC
                 protocols proposed for general wireless ad hoc networks
                 are not suitable for wireless sensor network
                 applications, which we further demonstrate through our
                 simulation experiments. In this article, we propose
                 MMSN, which takes advantage of multifrequency
                 availability while, at the same time, takes into
                 consideration the restrictions of wireless sensor
                 networks. In MMSN, four frequency assignment options
                 are provided to meet different application
                 requirements. A scalable media access is designed with
                 efficient broadcast support. Also, an optimal
                 nonuniform back-off algorithm is derived and its
                 lightweight approximation is implemented in MMSN, which
                 significantly reduces congestion in the time
                 synchronized media access design. Through extensive
                 experiments, MMSN exhibits the prominent ability to
                 utilize parallel transmissions among neighboring nodes.
                 When multiple physical frequencies are available, it
                 also achieves increased energy efficiency,
                 demonstrating the ability to work against radio
                 interference and the tolerance to a wide range of
                 measured time synchronization errors.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "media access control; multi-channel; radio
                 interference; time synchronization; Wireless sensor
                 networks",
}

@Article{Jung:2010:SFS,
  author =       "Dawoon Jung and Jeong-Uk Kang and Heeseung Jo and
                 Jin-Soo Kim and Joonwon Lee",
  title =        "Superblock {FTL}: a superblock-based {Flash
                 Translation Layer} with a hybrid address translation
                 scheme",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "40:1--40:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721706",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In NAND flash-based storage systems, an intermediate
                 software layer called a Flash Translation Layer (FTL)
                 is usually employed to hide the erase-before-write
                 characteristics of NAND flash memory. We propose a
                 novel superblock-based FTL scheme, which combines a set
                 of adjacent logical blocks into a superblock. In the
                 proposed Superblock FTL, superblocks are mapped at
                 coarse granularity, while pages inside the superblock
                 are mapped freely at fine granularity to any location
                 in several physical blocks. To reduce extra storage and
                 flash memory operations, the fine-grain mapping
                 information is stored in the spare area of NAND flash
                 memory. This hybrid address translation scheme has the
                 flexibility provided by fine-grain address translation,
                 while reducing the memory overhead to the level of
                 coarse-grain address translation. Our experimental
                 results show that the proposed FTL scheme significantly
                 outperforms previous block-mapped FTL schemes with
                 roughly the same memory overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "FTL; hybrid address translation; NAND flash memory;
                 storage system",
}

@Article{Klues:2010:LLD,
  author =       "Kevin Klues and Guoliang Xing and Chenyang Lu",
  title =        "Link layer driver architecture for unified radio power
                 management in wireless sensor networks",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "41:1--41:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721707",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless Sensor Networks (WSNs) represent a new
                 generation of networked embedded systems that must
                 achieve long lifetimes on scarce amounts of energy.
                 Since radio communication accounts for the primary
                 source of power drain in these networks, a large number
                 of different radio power management protocols have been
                 proposed. However, the lack of operating system support
                 for flexibly integrating them with a diverse set of
                 applications and network platforms has made them
                 difficult to use. This article focuses on providing
                 link layer support toward realizing a unified power
                 management architecture (UPMA) for WSNs. In contrast to
                 existing monolithic approaches, we provide (i) a set of
                 standard interfaces that separate link layer power
                 management protocols from common MAC level
                 functionality, (ii) an architectural framework that
                 allows applications to easily swap out different
                 power-management protocols depending on its needs, and
                 (iii) a mechanism for coordinating multiple
                 applications with different power management
                 requirements. We have implemented our approach on both
                 the Mica2 and Telosb radio drivers in TinyOS-2.0, the
                 second generation of the de facto standard operating
                 system for WSNs. Microbenchmark results show that our
                 approach can coordinate the power-management
                 requirements of multiple applications in a platform
                 independent fashion while incurring negligible
                 overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "architecture; framework; radio power management;
                 Wireless sensor networks",
}

@Article{Lee:2010:IHM,
  author =       "Jupyung Lee and Kyu Ho Park",
  title =        "Interrupt handler migration and direct interrupt
                 scheduling for rapid scheduling of interrupt-driven
                 tasks",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "42:1--42:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721708",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose two techniques that aim to
                 minimize the scheduling latency of high-priority
                 interrupt-driven tasks, named the Interrupt Handler
                 Migration (IHM) and Direct Interrupt Scheduling (DIS).
                 The IHM allows the interrupt handler to be migrated
                 from the interrupt handler thread to the corresponding
                 target process so that additional context switch can be
                 avoided and the cache hit ratio with respect to the
                 data generated by the interrupt handler can be
                 improved. In addition, the DIS allows the shortest path
                 reserved for urgent interrupt-process pairs to be laid
                 between the interrupt arrival and target process by
                 dividing a series of interrupt-driven operations into
                 nondeferrable and deferrable operations. Both the IHM
                 and DIS can be combined in a natural way and can
                 operate concurrently. These techniques can be applied
                 to all kinds of interrupt handlers with no modification
                 to them. The proposed techniques not only reduce the
                 scheduling latency, but also resolve the
                 interrupt-driven priority inversion problem.\par

                 We implemented a prototype in the Linux 2.6.19 kernel
                 after adding real-time patches. Experimental results
                 show that the scheduling latency is significantly
                 reduced by up to 84.2\% when both techniques are
                 applied together. When the Linux OS runs on an
                 ARM-based embedded CPU running at 200MHz, the
                 scheduling latency can become as low as 30$\mu$s, which
                 is much closer to the hardware-specific limitations. By
                 lowering the scheduling latency, the limited CPU cycles
                 can be consumed more for user-level processes and less
                 for system-level tasks, such as interrupt handling and
                 scheduling.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "interrupt handling; latency; Linux; Real-time
                 operating system; responsiveness; scheduling",
}

@Article{Tan:2010:MSE,
  author =       "Chiu C. Tan and Bo Sheng and Haodong Wang and Qun
                 Li",
  title =        "{Microsearch}: a search engine for embedded devices
                 used in pervasive computing",
  journal =      j-TECS,
  volume =       "9",
  number =       "4",
  pages =        "43:1--43:??",
  month =        mar,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1721695.1721709",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Apr 2 17:12:34 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we present Microsearch, a search
                 system suitable for embedded devices used in ubiquitous
                 computing environments. Akin to a desktop search
                 engine, Microsearch indexes the information inside a
                 small device, and accurately resolves a user's queries.
                 Given the limited hardware, conventional search engine
                 design and algorithms cannot be used. We adopt
                 Information Retrieval (IR) techniques for query
                 resolution, and proposed a new space-efficient top-$k$
                 query resolution algorithm. A theoretical model of
                 Microsearch is given to better understand the
                 trade-offs in design parameters. Evaluation is done via
                 actual implementation on off-the-shelf hardware.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Embedded search engine; information retrieval;
                 pervasive computing",
}

@Article{Higuera-Toledano:2010:ISI,
  author =       "M. Teresa Higuera-Toledano and Doug Locke and Angelo
                 Corsaro",
  title =        "Introduction to special issue on {Java} technologies
                 for real-time and embedded systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814540",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{DosSantos:2010:MPB,
  author =       "Osmar Marchi {Dos Santos} and Andy Wellings",
  title =        "Measuring and policing blocking times in real-time
                 systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814541",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In real-time systems, the execution-time overrun of a
                 thread may lead to a deadline being missed by the
                 thread or even others threads in the system. From a
                 fault tolerance perspective, both execution time
                 overruns and deadline misses can be considered timing
                 errors that could potentially cause a failure in the
                 system's ability to deliver its services in a timely
                 manner. In this context, the ideal is to detect the
                 error in the system as soon as possible, so that the
                 propagation of the error can be limited and error
                 recovery strategies can take place with more accurate
                 information. The run-time support mechanism usually
                 deployed for monitoring the timing requirements of
                 real-time systems is based on deadline monitoring, that
                 is, the system calls specific application code whenever
                 a deadline is violated. Recognizing that deadline
                 monitoring may not be enough for providing an adequate
                 level of fault tolerance for timing errors, major
                 real-time programming standards, like Ada, POSIX and
                 the Real-Time Specification for Java (RTSJ), have
                 proposed different mechanisms for monitoring the
                 execution time of threads. Nevertheless, in order to
                 provide a complete fault tolerance approach for timing
                 errors, the potential blocking time of threads also has
                 to be monitored. In this article, we propose mechanisms
                 for measuring and policing the blocking time of threads
                 in the context of both {\em basic priority
                 inheritance\/} and {\em priority ceiling protocols}.
                 The notion of {\em blocking-time clocks and timers\/}
                 for the POSIX standard is proposed, implemented and
                 evaluated in the open-source real-time operating system
                 MaRTE OS. Also, a {\em blocking time monitoring
                 model\/} for measuring and policing blocking times in
                 the RTSJ framework is specified. This model is
                 implemented and evaluated in the (RTSJ-compliant)
                 open-source middleware jRate, running on top of MaRTE
                 OS.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "error detection; POSIX standard; Real-time
                 specification for Java; timing errors",
}

@Article{Zerzelidis:2010:FFS,
  author =       "Alexandros Zerzelidis and Andy Wellings",
  title =        "A framework for flexible scheduling in the {RTSJ}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814542",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a viable solution to introducing
                 flexible scheduling in the Real-Time specification for
                 Java (RTSJ), in the form of a flexible scheduling
                 framework. The framework allows the concurrent use of
                 multiple application-defined scheduling policies, each
                 scheduling a subset of the total set of threads.
                 Moreover, all threads, regardless of the policy under
                 which they are scheduled, are permitted to share common
                 resources. Thus, the framework can accommodate a
                 variety of interworking applications (soft, firm, and
                 hard) running under the RTSJ. The proposed approach is
                 a two-level scheduling framework, where the first level
                 is the RTSJ priority scheduler and the second level is
                 under application control. This article describes the
                 framework's protocol, examines the different types of
                 scheduling policies that can be supported, and
                 evaluates the proposed framework by measuring its
                 execution cost. A description of an application-defined
                 Earliest-Deadline-First (EDF) scheduler illustrates how
                 the interface can be used. Minimum backward-compatible
                 changes to the RTSJ specification are discussed to
                 motivate the required interface. The only assumptions
                 made about the underlying real-time operating system is
                 that it supports preemptive priority-based dispatching
                 of threads and that changes to priorities have
                 immediate effect.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "flexible scheduling; RTSJ; Scheduling framework",
}

@Article{Spring:2010:RAI,
  author =       "Jesper Honig Spring and Filip Pizlo and Jean Privat
                 and Rachid Guerraoui and Jan Vitek",
  title =        "{Reflexes}: {Abstractions} for integrating highly
                 responsive tasks into {Java} applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814543",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Achieving submillisecond response times in a managed
                 language environment such as Java or C\# requires
                 overcoming significant challenges. In this article, we
                 propose Reflexes, a programming model and runtime
                 system infrastructure that lets developers seamlessly
                 mix highly responsive tasks and timing-oblivious Java
                 applications. Thus enabling gradual addition of
                 real-time features, to a non-real-time application
                 without having to resort to recoding the real-time
                 parts in a different language such as C or Ada.
                 Experiments with the Reflex prototype implementation
                 show that it is possible to run a real-time task with a
                 period of 45$\mu$s with an accuracy of 99.996\% (only
                 0.001\% worse than the corresponding C implementation)
                 in the presence of garbage collection and heavy load
                 ordinary Java threads.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Java virtual machine; memory management; Real-time
                 systems",
}

@Article{Kim:2010:EAE,
  author =       "Minseong Kim and Andy Wellings",
  title =        "Efficient asynchronous event handling in the real-time
                 specification for {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814544",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Real-Time Specification for Java (RTSJ) is
                 becoming mature. It has been implemented, formed the
                 basis for research and used in serious applications.
                 Some strengths and weaknesses are emerging. One of the
                 areas that requires further elaboration is asynchronous
                 event handling (AEH). The primary goal for handlers in
                 the RTSJ is to have a lightweight concurrency
                 mechanism. Some implementation will, however, simply
                 map a handler to a real-time thread and this results in
                 undermining the original motivations and introduces
                 performance penalties. However it is generally unclear
                 how to map handlers to real-time threads effectively.
                 Also the support for nonblocking handlers in the RTSJ
                 is criticized as lacking in configurability as
                 implementations are unable to take advantage of them.
                 This article, therefore, examines the AEH techniques
                 used in some popular RTSJ implementations and proposes
                 two efficient AEH models for the RTSJ. We then define
                 formal models of the RTSJ AEH implementations using the
                 automata formalism provided by the UPPAAL model
                 checking tool. Using the automata models, their
                 properties are explored and verified. In the proposed
                 models, blocking and nonblocking handlers are serviced
                 by different algorithms. In this way, it is possible to
                 assign a real-time thread to a handler at the right
                 time in the right place while maintaining the fewest
                 possible threads overall and to give a certain level of
                 configurability to AEH. We also have implemented the
                 proposed models on an existing RTSJ implementation,
                 jRate and executed a set of performance tests that
                 measure their respective dispatch and multiple-handler
                 completion latencies. The results from the tests and
                 the verifications indicate that the proposed models
                 require fewer threads on average with better
                 performance than other approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "asynchronous event handling; blocking handler;
                 multiple-server switching phenomenon; nonblocking
                 handler; RTSJ",
}

@Article{Schoeberl:2010:NRT,
  author =       "Martin Schoeberl and Wolfgang Puffitsch",
  title =        "Nonblocking real-time garbage collection",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814545",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A real-time garbage collector has to fulfill two basic
                 properties: ensure that programs with bounded
                 allocation rates do not run out of memory and provide
                 short blocking times. Even for incremental garbage
                 collectors, two major sources of blocking exist,
                 namely, root scanning and heap compaction. Finding root
                 nodes of an object graph is an integral part of tracing
                 garbage collectors and cannot be circumvented. Heap
                 compaction is necessary to avoid probably unbounded
                 heap fragmentation, which in turn would lead to
                 unacceptably high memory consumption. In this article,
                 we propose solutions to both issues.\par

                 Thread stacks are local to a thread, and root scanning,
                 therefore, only needs to be atomic with respect to the
                 thread whose stack is scanned. This fact can be
                 utilized by either blocking only the thread whose stack
                 is scanned, or by delegating the responsibility for
                 root scanning to the application threads. The latter
                 solution eliminates blocking due to root scanning
                 completely. The impact of this solution on the
                 execution time of a garbage collector is shown for two
                 different variants of such a root scanning
                 algorithm.\par

                 During heap compaction, objects are copied. Copying is
                 usually performed atomically to avoid interference with
                 application threads, which could render the state of an
                 object inconsistent. Copying of large objects and
                 especially large arrays introduces long blocking times
                 that are unacceptable for real-time systems. In this
                 article, an interruptible copy unit is presented that
                 implements nonblocking object copy. The unit can be
                 interrupted after a single word move.\par

                 We evaluate a real-time garbage collector that uses the
                 proposed techniques on a Java processor. With this
                 garbage collector, it is possible to run high-priority
                 hard real-time tasks at 10 kHz parallel to the garbage
                 collection task on a 100 MHz system.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Garbage collection; nonblocking copying; real-time;
                 root scanning",
}

@Article{Basanta-Val:2010:NHR,
  author =       "Pablo Basanta-Val and Marisol Garc{\'\i}a-Valls and
                 Iria Est{\'e}vez-Ayres",
  title =        "{No-Heap Remote Objects} for distributed real-time
                 {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814546",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents an approach to providing
                 real-time support for Java's Remote Method Invocation
                 (RMI) and its integration with the RTSJ memory model in
                 order to leave out garbage collection. A new construct
                 for remote objects, called {\em No-heap Remote
                 object\/} ({\em NhRo\/}), is introduced. The use of a
                 NhRo guarantees that memory required to perform a
                 remote invocation (at the server side) does not use
                 heap memory. Thus, the aim is to avoid garbage
                 collection in the remote invocation process, improving
                 predictability and memory isolation of distributed
                 Java-based real-time applications. The article presents
                 the bare model and the main programming patterns that
                 are associated with the NhRo model. Sun RMI
                 implementation has been modified to integrate the NhRo
                 model in both static and dynamic environments.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "distributed real-time Java; DRTSJ; Real-time Java;
                 real-time remote objects; region-based memory
                 management; RTSJ",
}

@Article{Curley:2010:RDT,
  author =       "Edward Curley and Binoy Ravindran and Jonathan
                 Anderson and E. Douglas Jensen",
  title =        "Recovering from distributable thread failures in
                 distributed real-time {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814547",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider the problem of recovering from the
                 failures of distributable threads (``threads'') in
                 distributed real-time systems that operate under
                 runtime uncertainties including those on thread
                 execution times, thread arrivals, and node failure
                 occurrences. When a thread experiences a node failure,
                 the result is a broken thread having an orphan. Under a
                 termination model, the orphans must be detected and
                 aborted, and exceptions must be delivered to the
                 farthest, contiguous surviving thread segment for
                 resuming thread execution. Our application/scheduling
                 model includes the proposed distributable thread
                 programming model for the emerging Distributed
                 Real-Time Specification for Java (DRTSJ), together with
                 an exception-handler model. Threads are subject to
                 time/utility function (TUF) time constraints and an
                 utility accrual (UA) optimality criterion. A key
                 underpinning of the TUF/UA scheduling paradigm is the
                 notion of ``best-effort'' where higher importance
                 threads are always favored over lower importance ones,
                 irrespective of thread urgency as specified by their
                 time constraints. We present a thread scheduling
                 algorithm called HUA and a thread integrity protocol
                 called TPR. We show that HUA and TPR bound the orphan
                 cleanup and recovery time with bounded loss of the
                 best-effort property. Our implementation experience for
                 HUA/TPR in the Reference Implementation of the proposed
                 programming model for the DRTSJ demonstrates the
                 algorithm/protocol's effectiveness.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "distributable thread; Distributed; distributed
                 scheduling; Java; real-time; thread integrity",
}

@Article{Pitter:2010:RTJ,
  author =       "Christof Pitter and Martin Schoeberl",
  title =        "A real-time {Java} chip-multiprocessor",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814548",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Chip-multiprocessors are an emerging trend for
                 embedded systems. In this article, we introduce a
                 real-time Java multiprocessor called JopCMP. It is a
                 symmetric shared-memory multiprocessor, and consists of
                 up to eight Java Optimized Processor (JOP) cores, an
                 arbitration control device, and a shared memory. All
                 components are interconnected via a system on chip bus.
                 The arbiter synchronizes the access of multiple CPUs to
                 the shared main memory. In this article, three
                 different arbitration policies are presented,
                 evaluated, and compared with respect to their real-time
                 and average-case performance: a fixed priority, a
                 fair-based, and a time-sliced arbiter.\par

                 Tasks running on different CPUs of a
                 chip-multiprocessor (CMP) influence each others'
                 execution times when accessing a shared memory.
                 Therefore, the system needs an arbiter that is able to
                 limit the worst-case execution time of a task running
                 on a CPU, even though tasks executing simultaneously on
                 other CPUs access the main memory. Our research shows
                 that timing analysis is in fact possible for
                 homogeneous multiprocessor systems with a shared
                 memory. The timing analysis of tasks, executing on the
                 CMP using time-sliced memory arbitration, leads to
                 viable worst-case execution time bounds.\par

                 The time-sliced arbiter divides the memory access time
                 into equal time slots, one time slot for each CPU. This
                 memory arbitration scheme allows for a calculation of
                 upper bounds of Java application worst-case execution
                 times, depending on the number of CPUs, the time slot
                 size, and the memory access time. Examples of
                 worst-case execution time calculation are presented,
                 and the analyzed results of a real-world application
                 task are compared to measured execution time results.
                 Finally, we evaluate the tradeoffs when using a
                 time-predictable solution compared to using
                 average-case optimized chip-multiprocessors, applying
                 three different benchmarks. These experiments are
                 carried out by executing the programs on the CMP
                 prototype.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Java processor; multiprocessor; Real-time system;
                 shared memory; worst-case execution time",
}

@Article{Kaiser:2010:ISI,
  author =       "William Kaiser and Majid Sarrafzadeh",
  title =        "Introduction to special issue on wireless health",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814549",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ko:2010:MME,
  author =       "Jeonggil Ko and Jong Hyun Lim and Yin Chen and
                 Rv{\~a}zvan Musvaloiu-E and Andreas Terzis and Gerald
                 M. Masson and Tia Gao and Walt Destler and Leo Selavo
                 and Richard P. Dutton",
  title =        "{MEDiSN}: {Medical} emergency detection in sensor
                 networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814550",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Staff shortages and an increasingly aging population
                 are straining the ability of emergency departments to
                 provide high quality care. At the same time, there is a
                 growing concern about hospitals' ability to provide
                 effective care during disaster events. For these
                 reasons, tools that automate patient monitoring have
                 the potential to greatly improve efficiency and quality
                 of health care. Towards this goal, we have developed
                 {\em MEDiSN}, a wireless sensor network for monitoring
                 patients' physiological data in hospitals and during
                 disaster events. MEDiSN comprises {\em Physiological
                 Monitors\/} (PMs), which are custom-built, patient-worn
                 motes that sample, encrypt, and sign physiological data
                 and {\em Relay Points\/} (RPs) that self-organize into
                 a multi-hop wireless backbone for carrying
                 physiological data. Moreover, MEDiSN includes a
                 back-end server that persistently stores medical data
                 and presents them to authenticated GUI clients. The
                 combination of MEDiSN's two-tier architecture and
                 optimized rate control protocols allows it to address
                 the compound challenge of reliably delivering large
                 volumes of data while meeting the application's QoS
                 requirements. Results from extensive simulations,
                 testbed experiments, and multiple pilot hospital
                 deployments show that MEDiSN can scale from tens to at
                 least five hundred PMs, effectively protect application
                 packets from congestive and corruptive losses, and
                 deliver medically actionable data.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Medical sensor networks; wireless physiological
                 monitoring",
}

@Article{Coronato:2010:FSW,
  author =       "Antonio Coronato and Giuseppe {De Pietro}",
  title =        "Formal specification of wireless and pervasive
                 healthcare applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "12:1--12:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814551",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless and pervasive healthcare applications
                 typically present critical requirements from the point
                 of view of functional correctness, reliability,
                 availability, security, and safety. In contrast to the
                 case of classic safety critical applications, the
                 behavior of wireless and pervasive applications is
                 affected by the movements and location of users and
                 resources.\par

                 This article presents a methodology to formally express
                 requirements in safety critical wireless and pervasive
                 healthcare applications in order to achieve a higher
                 degree of dependability. In particular, it will be
                 shown how it is possible to formalize and constrict
                 mobility characteristics by combining, and in some
                 cases extending, several formal methods. The article
                 also describes a rigorous specification process.
                 Finally, it concludes with a case study of a real
                 safety critical pervasive healthcare application that
                 is going to be deployed in a city hospital.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "Formal specification; methodologies and tools;
                 wireless and pervasive healthcare applications",
}

@Article{Waluyo:2010:MMB,
  author =       "Agustinus Borgy Waluyo and Wee-Soon Yeoh and Isaac Pek
                 and Yihan Yong and Xiang Chen",
  title =        "{MobiSense}: {Mobile} body sensor network for
                 ambulatory monitoring",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "13:1--13:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814552",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces MobiSense, a novel mobile
                 health monitoring system for ambulatory patients.
                 MobiSense resides in a mobile device, communicates with
                 a set of body sensor devices attached to the wearer,
                 and processes data from these sensors. MobiSense is
                 able to detect body postures such as lying, sitting,
                 and standing, and walking speed, by utilizing our
                 rule-based heuristic activity classification scheme
                 based on the extended Kalman (EK) Filtering algorithm.
                 Furthermore, the proposed system is capable of
                 controlling each of the sensor devices, and performing
                 resource reconfiguration and management schemes (sensor
                 sleep/wake-up mode). The architecture of MobiSense is
                 highlighted and discussed in depth. The system has been
                 implemented, and its prototype is showcased. We have
                 also carried out rigorous performance measurements of
                 the system including real-time and query latency as
                 well as the power consumption of the sensor nodes. The
                 accuracy of our activity classifier scheme has been
                 evaluated by involving several human subjects, and we
                 found promising results.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "ambulatory patient monitoring; pervasive healthcare;
                 wireless body sensor network; Wireless health system",
}

@Article{Quwaider:2010:TPA,
  author =       "Muhannad Quwaider and Jayanthi Rao and Subir Biswas",
  title =        "Transmission power assignment with postural position
                 inference for on-body wireless communication links",
  journal =      j-TECS,
  volume =       "10",
  number =       "1",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1814539.1814553",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Aug 30 15:29:45 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a novel transmission power
                 assignment mechanism for on-body wireless links formed
                 between severely energy-constrained wearable and
                 implanted sensors. The key idea is to develop a
                 measurement-based framework in which the postural
                 position as it pertains to a given wireless link is
                 first inferred based on the measured RF signal strength
                 and packet drops. Then optimal power assignment is done
                 by fitting those measurement results into a model
                 describing the relationship between the assigned power
                 and the resulting signal strength. A closed loop power
                 control mechanism is then added for iterative
                 convergence to the optimal power level as a response to
                 both intra-and-inter posture body movements. This
                 provides a practical paradigm for on-body power
                 assignment, which cannot leverage the existing
                 mechanisms in the literature that rely on localization,
                 which is not realistic for on-body sensors. Extensive
                 experimental results are provided to demonstrate the
                 model building and algorithm performance on a prototype
                 body area network. The proposed mechanism has also been
                 compared with a number of other closed loop mechanisms
                 and an experimental benchmark.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
  keywords =     "adaptive power control; Body area network; link
                 quality measurement; radio link quality",
}

@Article{Basten:2010:EMD,
  author =       "Twan Basten and Rolf Ernst",
  title =        "Editorial: {Model-driven} embedded-system design",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880051",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Geilen:2010:SDS,
  author =       "Marc Geilen",
  title =        "Synchronous dataflow scenarios",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880052",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The Synchronous Dataflow (SDF) model of computation by
                 Lee and Messerschmitt has become popular for modeling
                 concurrent applications on a multiprocessor platform.
                 It is used to obtain a guaranteed, predictable
                 performance. The model, on the other hand, is quite
                 restrictive in its expressivity, making it less
                 applicable to many modern, more dynamic applications. A
                 common technique to deal with dynamic behavior is to
                 consider different scenarios in separation. This
                 analysis is, however, currently limited mainly to
                 sequential applications. In this article, we present a
                 new analysis approach that allows analysis of
                 synchronous dataflow models across different scenarios
                 of operation. The dataflow graphs corresponding to the
                 different scenarios can be completely different.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wiggers:2010:BCC,
  author =       "Maarten H. Wiggers and Marco J. G. Bekooij and Gerard
                 J. M. Smit",
  title =        "Buffer capacity computation for throughput-constrained
                 modal task graphs",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880053",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Increasingly, stream-processing applications include
                 complex control structures to better adapt to changing
                 conditions in their environment. This adaptivity often
                 results in task execution rates that are dependent on
                 the processed stream. Current approaches to compute
                 buffer capacities that are sufficient to satisfy a
                 throughput constraint have limited applicability in
                 case of data-dependent task execution rates. In this
                 article, we present a dataflow model that allows tasks
                 to have loops with an unbounded number of iterations.
                 For instances of this dataflow model, we present
                 efficient checks on their validity. Furthermore, we
                 present an efficient algorithm to compute buffer
                 capacities that are sufficient to satisfy a throughput
                 constraint.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Falk:2010:ASA,
  author =       "Joachim Falk and Christian Zebelein and Joachim
                 Keinert and Christian Haubelt and Juergen Teich and
                 Shuvra S. Bhattacharyya",
  title =        "Analysis of {SystemC} actor networks for efficient
                 synthesis",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880054",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Applications in the signal processing domain are often
                 modeled by dataflow graphs. Due to heterogeneous
                 complexity requirements, these graphs contain both
                 dynamic and static dataflow actors. In previous work,
                 we presented a generalized clustering approach for
                 these heterogeneous dataflow graphs in the presence of
                 unbounded buffers. This clustering approach allows the
                 application of static scheduling methodologies for
                 static parts of an application during embedded software
                 generation for multiprocessor systems. It
                 systematically exploits the predictability and
                 efficiency of the static dataflow model to obtain
                 latency and throughput improvements. In this article,
                 we present a generalization of this clustering
                 technique to dataflow graphs with bounded buffers,
                 therefore enabling synthesis for embedded systems
                 without dynamic memory allocation.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Razavi:2010:SAB,
  author =       "Niloofar Razavi and Razieh Behjati and Hamideh Sabouri
                 and Ehsan Khamespanah and Amin Shali and Marjan Sirjani",
  title =        "{Sysfier}: {Actor-based} formal verification of
                 {SystemC}",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880055",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "SystemC is a system-level modeling language that can
                 be used effectively for hardware/software co-design.
                 Since a major goal of SystemC is to enable verification
                 at higher levels of abstraction, the tendency is now
                 directing to introducing formal verification approaches
                 for SystemC. In this article, we propose an approach
                 for formal verification of SystemC designs, and provide
                 the semantics of SystemC using Labeled Transition
                 Systems (LTS) for this purpose. An actor-based
                 language, Rebeca, is used as an intermediate language.
                 SystemC designs are mapped to Rebeca models and then
                 Rebeca verification toolset is used to verify LTL and
                 CTL properties. To tackle the state-space explosion,
                 Rebeca model checkers offer some reduction policies
                 that make them appropriate for SystemC verification.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Adler:2010:CBM,
  author =       "Rasmus Adler and Ina Schaefer and Mario Trapp and Arnd
                 Poetzsch-Heffter",
  title =        "Component-based modeling and verification of dynamic
                 adaptation in safety-critical embedded systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "20:1--20:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880056",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Adaptation is increasingly used in the development of
                 safety-critical embedded systems, in particular to
                 reduce hardware needs and to increase availability.
                 However, composing a system from many reconfigurable
                 components can lead to a huge number of possible system
                 configurations, inducing a complexity that cannot be
                 handled during system design. To overcome this problem,
                 we propose a new component-based modeling and
                 verification method for adaptive embedded systems. The
                 component-based modeling approach facilitates
                 abstracting a composition of components to a
                 hierarchical component. In the hierarchical component,
                 the number of possible configurations of the
                 composition is reduced to a small number of
                 hierarchical configurations.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Driver:2010:MES,
  author =       "Cormac Driver and Sean Reilly and {\'E}amonn Linehan
                 and Vinny Cahill and Siobh{\'a}n Clarke",
  title =        "Managing embedded systems complexity with
                 aspect-oriented model-driven engineering",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "21:1--21:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880057",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Model-driven engineering addresses issues of platform
                 heterogeneity and code quality through the use of
                 high-level system models and subsequent automatic
                 transformations. Adoption of the model-driven software
                 engineering paradigm for embedded systems necessitates
                 specification of appropriate models of often complex
                 systems. Modern embedded systems are typically composed
                 of multiple functional and nonfunctional concerns, with
                 the nonfunctional concerns (e.g., timing and
                 performance) typically affecting the design and
                 implementation of the functional concerns. The presence
                 of crosscutting concerns makes specification of
                 adequate platform-independent models a significant
                 challenge. Aspect-oriented software development is a
                 separation of concerns technique that decomposes
                 systems into distinct features with minimal overlap.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Schliecker:2010:RTP,
  author =       "Simon Schliecker and Rolf Ernst",
  title =        "Real-time performance analysis of multiprocessor
                 systems with shared memory",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "22:1--22:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880058",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Predicting timing behavior is key to reliable
                 real-time system design and verification, but becomes
                 increasingly difficult for current multiprocessor
                 systems on chip. The integration of formerly separate
                 functionality into a single multicore system introduces
                 new intercore timing dependencies resulting from the
                 common use of the now shared resources. This feedback
                 of system timing on local timing makes traditional
                 performance analysis approaches inappropriate. This
                 article presents a general methodology to model the
                 shared resource traffic and consider its effect on the
                 local task execution. The aggregate busy time captures
                 the timing of multiple accesses to a shared memory far
                 better than the traditional models that focus on the
                 timing of individual events.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Seo:2010:DAS,
  author =       "Euiseong Seo and Sangwon Kim and Seonyeong Park and
                 Joonwon Lee",
  title =        "Dynamic alteration schemes of real-time schedules for
                 {I/O} device energy efficiency",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "23:1--23:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880059",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many I/O devices provide multiple power states known
                 as the dynamic power management (DPM) feature. However,
                 activating from sleep state requires significant
                 transition time and this obstructs utilizing DPM in
                 nonpreemptive real-time systems. This article suggests
                 nonpreemptive real-time task scheduling schemes
                 maximizing the effectiveness of the I/O device DPM
                 support. First, we introduce a runtime schedulability
                 check algorithm for nonpreemptive real-time systems
                 that can check whether a modification from a valid
                 schedule is still valid. By using this, we suggest
                 three heuristic algorithms. The first algorithm
                 reorders the execution sequence of tasks according to
                 the similarity of their required device sets.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cabodi:2010:BSF,
  author =       "Gianpiero Cabodi and Marco Murciano and Massimo
                 Violante",
  title =        "Boosting software fault injection for dependability
                 analysis of real-time embedded applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "24:1--24:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880060",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The design of complex embedded systems deployed in
                 safety-critical or mission-critical applications
                 mandates the availability of methods to validate the
                 system dependability across the whole design flow. In
                 this article we introduce a fault injection approach,
                 based on loadable kernel modules and running under the
                 Linux operating system, which can be adopted as soon as
                 a running prototype of the systems is available.
                 Moreover, for the purpose of decoupling dependability
                 analysis from hardware availability, we also propose
                 the adoption of hardware virtualization.",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mohan:2010:PTA,
  author =       "Sibin Mohan and Frank Mueller and Michael Root and
                 William Hawkins and Christopher Healy and David Whalley
                 and Emilio Vivancos",
  title =        "Parametric timing analysis and its application to
                 dynamic voltage scaling",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "25:1--25:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880061",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems with real-time constraints depend on
                 a priori knowledge of worst-case execution times
                 (WCETs) to determine if tasks meet deadlines. Static
                 timing analysis derives bounds on WCETs but requires
                 statically known loop bounds. This work removes the
                 constraint on known loop bounds through parametric
                 analysis expressing WCETs as functions. Tighter WCETs
                 are dynamically discovered to exploit slack by dynamic
                 voltage scaling (DVS) saving 60\% to 82\% energy over
                 DVS-oblivious techniques and showing savings close to
                 more costly dynamic-priority DVS algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhu:2010:RAD,
  author =       "Dakai Zhu",
  title =        "Reliability-aware dynamic energy management in
                 dependable embedded real-time systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "26:1--26:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880062",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Recent studies show that voltage scaling, which is an
                 efficient energy management technique, has a direct and
                 negative effect on system reliability because of the
                 increased rate of transient faults (e.g., those induced
                 by cosmic particles). In this article, we propose
                 energy management schemes that explicitly take system
                 reliability into consideration. The proposed
                 reliability-aware energy management schemes dynamically
                 schedule recoveries for tasks to be scaled down to
                 recuperate the reliability loss due to energy
                 management. Based on the amount of available slack, the
                 application size, and the fault rate changes, we
                 analyze when it is profitable to reclaim the slack for
                 energy savings without sacrificing system
                 reliability.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ramaprasad:2010:TBF,
  author =       "Harini Ramaprasad and Frank Mueller",
  title =        "Tightening the bounds on feasible preemptions",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "27:1--27:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880063",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Data caches are an increasingly important
                 architectural feature in most modern computer systems.
                 They help bridge the gap between processor speeds and
                 memory access times. One inherent difficulty of using
                 data caches in a real-time system is the
                 unpredictability of memory accesses, which makes it
                 difficult to calculate worst-case execution times
                 (WCETs) of real-time tasks. While cache analysis for
                 single real-time tasks has been the focus of much
                 research in the past, bounding the preemption delay in
                 a multitask preemptive environment is a challenging
                 problem, particularly for data caches. This article
                 makes multiple contributions in the context of
                 independent, periodic tasks with deadlines less than or
                 equal to their periods executing on a single
                 processor.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Li:2010:SMA,
  author =       "Lian Li and Jingling Xue and Jens Knoop",
  title =        "Scratchpad memory allocation for data aggregates via
                 interval coloring in superperfect graphs",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "28:1--28:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880064",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Existing methods place data or code in scratchpad
                 memory (SPM) by relying on heuristics or resorting to
                 integer programming or mapping it to a graph-coloring
                 problem. In this article, the SPM allocation problem
                 for arrays is formulated as an interval coloring
                 problem. The key observation is that in many embedded C
                 programs, two arrays can be modeled such that either
                 their live ranges do not interfere or one contains the
                 other (with good accuracy). As a result, array
                 interference graphs often form a special class of
                 superperfect graphs (known as comparability graphs),
                 and their optimal interval colorings become efficiently
                 solvable. This insight has led to the development of an
                 SPM allocation algorithm that places arrays in an
                 interference graph in SPM by examining its maximal
                 cliques.",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Singh:2010:CPD,
  author =       "Montek Singh and Steven M. Nowick",
  title =        "Call for papers: {Deadline: March 15, 2011}",
  journal =      j-TECS,
  volume =       "10",
  number =       "2",
  pages =        "29:1--29:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1880050.1880065",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Jan 10 09:44:12 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{West:2011:ASS,
  author =       "Richard West and Gabriel Parmer",
  title =        "Application-specific service technologies for
                 commodity operating systems in real-time environments",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952523",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to eliminate the costs of proprietary systems
                 and special purpose hardware, many real-time and
                 embedded computing platforms are being built on
                 commodity operating systems and generic hardware.
                 Unfortunately, many such systems are ill-suited to the
                 low-latency and predictable timing requirements of
                 real-time applications. This article, therefore,
                 focuses on application-specific service technologies
                 for low-cost commodity operating systems and hardware,
                 so that real-time service guarantees can be met. We
                 describe contrasting methods to deploy first-class
                 services on commodity systems that are dispatched with
                 low latency and execute asynchronously according to
                 bounds on CPU, memory, and I/O device usage.
                 Specifically, we present a ``user-level sandboxing''
                 (ULS) mechanism that relies on hardware protection to
                 isolate application-specific services from the core
                 kernel.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Liu:2011:NBF,
  author =       "Xue Liu and Tarek Abdelzaher",
  title =        "Nonutilization bounds and feasible regions for
                 arbitrary fixed-priority policies",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952524",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Prior research on schedulability bounds focused
                 primarily on bounding utilization/ as a means to meet
                 deadline constraints. Nontrivial bounds were found for
                 a handful of scheduling policies in which utilization
                 is directly related to the ability of the policy to
                 meet deadlines. Examples include rate-monotonic,
                 deadline-monotonic, and EDF scheduling. For most other
                 scheduling policies, however, utilization is not
                 correlated with schedulability. For example,
                 shortest-job-first can miss deadlines at an arbitrarily
                 low utilization. This raises the question of whether or
                 not some other nonutilization-based metric might be
                 more indicative of schedulability in those cases. This
                 article answers the above question positively by
                 extending the notion of schedulability bounds, in a
                 uniform manner, to arbitrary (fixed) priorities and
                 nonutilization metrics.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nair:2011:EHB,
  author =       "Ajay Nair and Karthik Shankar and Roman Lysecky",
  title =        "Efficient hardware-based nonintrusive dynamic
                 application profiling",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952525",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Application profiling---the process of monitoring an
                 application to determine the frequency of execution
                 within specific regions---is an essential step within
                 the design process for many software and hardware
                 systems. Profiling is often a critical step within
                 hardware/software partitioning utilized to determine
                 the critical kernels of an application. In this
                 article, we present an innovative, nonintrusive dynamic
                 application profiler (DAProf) capable of profiling an
                 executing application by monitoring the application's
                 short backward branches, function calls, and function
                 returns. The resulting profile information provides an
                 accurate characterization of the frequently executed
                 loops within the application providing a breakdown of
                 loop executions versus loop iterations per execution.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Aaraj:2011:FDE,
  author =       "Najwa Aaraj and Anand Raghunathan and Niraj K. Jha",
  title =        "A framework for defending embedded systems against
                 software attacks",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952526",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The incidence of malicious code and software
                 vulnerability exploits on embedded platforms is
                 constantly on the rise. Yet, little effort is being
                 devoted to combating such threats to embedded systems.
                 Moreover, adapting security approaches designed for
                 general-purpose systems generally fails because of the
                 limited processing capabilities of their embedded
                 counterparts. In this work, we evaluate a malware and
                 software vulnerability exploit defense framework for
                 embedded systems. The proposed framework extends our
                 prior work, which defines two isolated execution
                 environments: a testing environment, wherein an
                 untrusted application is first tested using dynamic
                 binary instrumentation (DBI), and a real environment,
                 wherein a program is monitored at runtime using an
                 extracted behavioral model, along with a continuous
                 learning process.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Berendsen:2011:FSA,
  author =       "Jasper Berendsen and Biniam Gebremichael and Frits W.
                 Vaandrager and Miaomiao Zhang",
  title =        "Formal specification and analysis of {Zeroconf} using
                 {Uppaal}",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "34:1--34:32",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952527",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The model checker Uppaal is used to formally model and
                 analyze parts of Zeroconf, a protocol for dynamic
                 configuration of IPv4 link-local addresses that has
                 been defined in RFC 3927 of the IETF. Our goal has been
                 to construct a model that (a) is easy to understand by
                 engineers, (b) comes as close as possible to the
                 informal text (for each transition in the model there
                 should be a corresponding piece of text in the RFC),
                 and (c) may serve as a basis for formal verification.
                 Our modeling efforts revealed several errors (or at
                 least ambiguities) in the RFC that no one else spotted
                 before.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ykman-Couvreur:2011:FMM,
  author =       "Ch. Ykman-Couvreur and V. Nollet and F. Catthoor and
                 H. Corporaal",
  title =        "Fast multidimension multichoice knapsack heuristic for
                 {MP-SoC} runtime management",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952528",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Since the application complexity is growing and
                 applications can be dynamically activated, the major
                 challenge for heterogeneous multiprocessor platforms is
                 to select at runtime an energy-efficient mapping of
                 these applications. Taking into account that many
                 different possible implementations per application can
                 be available, and that the selection must meet the
                 application deadlines under the available platform
                 resources, this runtime optimization problem can be
                 modeled as a Multidimension Multichoice Knapsack
                 Problem (MMKP), which is known to be NP-hard. Not only
                 algorithms for an optimal solution, but also
                 state-of-the-art heuristics for real-time systems are
                 still too slow for runtime management of multiprocessor
                 platforms.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ragel:2011:HHS,
  author =       "Roshan G. Ragel and Sri Parameswaran",
  title =        "A hybrid hardware--software technique to improve
                 reliability in embedded processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952529",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Numerous methods have been described in research
                 literature with methods to improve reliability of
                 processors by the use of control-flow checking. High
                 performance and code-size penalties cripple the
                 proposed software approaches, while hardware approaches
                 are not scalable and are thus rarely implemented in
                 real embedded systems. In this article, we show that by
                 including control-flow checking as an issue to be
                 considered when designing as embedded processor, we are
                 able to reduce overheads considerably and still provide
                 a scalable solution to this problem. The technique
                 described in this article includes architectural
                 improvements to the processor and binary rewriting of
                 the application.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huynh:2011:EAR,
  author =       "Johnny Huynh and Jos{\'e} Nelson Amaral and Paul
                 Berube and Sid-Ahmed-Ali Touati",
  title =        "Evaluating address register assignment and offset
                 assignment algorithms",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952530",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In digital signal processors (DSPs), variables are
                 accessed using $k$ address registers. The problem of
                 finding a memory layout, for a set of variables, that
                 minimizes the address-computation overhead is known as
                 the General Offset Assignment (GOA) problem. The most
                 common approach to this problem is to partition the set
                 of variables into $k$ partitions and to assign each
                 partition to an address register. Thus, effectively
                 decomposing the GOA problem into several Simple Offset
                 Assignment (SOA) problems. Many heuristic-based
                 algorithms are proposed in the literature to
                 approximate solutions to both the variable partitioning
                 and the SOA problems. However, the address-computation
                 overhead of the resulting memory layouts are not
                 accurately evaluated.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Diguet:2011:CLB,
  author =       "Jean-Philippe Diguet and Yvan Eustache and Guy
                 Gogniat",
  title =        "Closed-loop--based self-adaptive {Hardware\slash
                 Software-Embedded} systems: Design methodology and
                 smart {CAM} case study",
  journal =      j-TECS,
  volume =       "10",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1952522.1952531",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon May 2 10:07:27 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents our methodology for implementing
                 self-adaptivness within an OS-based and reconfigurable
                 embedded system according to objectives such as quality
                 of service, performance, or power consumption. We
                 detail our approach to separate application-specific
                 decisions and hardware\slash software-implementation
                 decisions at system level. The former are related to
                 the efficiency control of applications and based on the
                 knowledge of application engineers. The latter are
                 generic and address the choice between various hardware
                 and software implementations according to user
                 objectives. The decision management is implemented as
                 an adaptive closed-loop model. We describe how each
                 design step may be implemented and especially how we
                 solved the issue of stability.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gamatie:2011:MDD,
  author =       "Abdoulaye Gamati{\'e} and S{\'e}bastien {Le Beux} and
                 {\'E}ric Piel and Rabie {Ben Atitallah} and Anne Etien
                 and Philippe Marquet and Jean-Luc Dekeyser",
  title =        "A Model-Driven Design Framework for Massively Parallel
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "39:1--39:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043663",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern embedded systems integrate more and more
                 complex functionalities. At the same time, the
                 semiconductor technology advances enable to increase
                 the amount of hardware resources on a chip for the
                 execution. Massively parallel embedded systems
                 specifically deal with the optimized usage of such
                 hardware resources to efficiently execute their
                 functionalities. The design of these systems mainly
                 relies on the following challenging issues: first, how
                 to deal with the parallelism in order to increase the
                 performance; second, how to abstract their
                 implementation details in order to manage their
                 complexity; third, how to refine these abstract
                 representations in order to produce efficient
                 implementations.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2011:DPT,
  author =       "Seungkyun Kim and Kiwon Kwon and Chihun Kim and
                 Choonki Jang and Jaejin Lee and Sang Lyul Min",
  title =        "Demand Paging Techniques for Flash Memory Using
                 Compiler Post-Pass Optimizations",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "40:1--40:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043664",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose an application-specific
                 demand paging mechanism for low-end embedded systems
                 that have flash memory as secondary storage. These
                 systems are not equipped with virtual memory. A small
                 memory space called an execution buffer is used to page
                 the code of an application. An application-specific
                 page manager manages the buffer. The page manager is
                 automatically generated by a compiler post-pass
                 optimizer and combined with the application image. The
                 post-pass optimizer analyzes the executable image and
                 transforms function call/return instructions into calls
                 to the page manager. As a result, each function in the
                 code can be loaded into the memory on demand at
                 runtime.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dini:2011:LLA,
  author =       "Gianluca Dini and Ida M. Savino",
  title =        "{LARK}: a Lightweight Authenticated {ReKeying}
                 Scheme for Clustered Wireless Sensor Networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "41:1--41:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043665",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Group communication has proven a powerful paradigm for
                 designing applications and services in Wireless Sensor
                 Networks (WSNs). Given the tight interaction between
                 WSNs and the physical world, a security infringement
                 may translate into a safety infringement. Therefore, in
                 order to fully exploit the group communication paradigm
                 we need to secure it. Traditionally, this requirement
                 has been formalized in terms of backward and forward
                 security and fulfilled by means of rekeying. In WSNs,
                 group rekeying becomes particularly a complex problem
                 because communication takes place over an easily
                 accessible wireless medium and because sensor nodes
                 have severe limitations in terms of computing, storage,
                 energy, and tamper-resistance capabilities for cost
                 reasons.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Schoeberl:2011:HAL,
  author =       "Martin Schoeberl and Stephan Korsholm and Tomas
                 Kalibera and Anders P. Ravn",
  title =        "A Hardware Abstraction Layer in {Java}",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "42:1--42:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043666",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded systems use specialized hardware devices to
                 interact with their environment, and since they have to
                 be dependable, it is attractive to use a modern,
                 type-safe programming language like Java to develop
                 programs for them. Standard Java, as a
                 platform-independent language, delegates access to
                 devices, direct memory access, and interrupt handling
                 to some underlying operating system or kernel, but in
                 the embedded systems domain resources are scarce and a
                 Java Virtual Machine (JVM) without an underlying
                 middleware is an attractive architecture. The
                 contribution of this article is a proposal for Java
                 packages with hardware objects and interrupt handlers
                 that interface to such a JVM.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gilroy:2011:RHA,
  author =       "Michael Gilroy and James Irvine and Robert Atkinson",
  title =        "{RAID 6} Hardware Acceleration",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "43:1--43:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043667",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Inexpensive, reliable hard disk storage is
                 increasingly required in both businesses and the home.
                 As disk capacities increase and multiple drives are
                 combined in one system the probability of multiple disk
                 failures increases. Through the adoption of RAID 6 the
                 capability to recover from up to two simultaneous disk
                 failures becomes available. In this article, we present
                 three different RAID 6 implementations each tailored to
                 support different target applications and optimized to
                 reduce overall hardware resource utilization. We
                 present an optimal Reed-Solomon-based RAID 6
                 implementation for arrays of four disks. We also
                 present the smallest in terms of hardware resource
                 utilization as well having the highest throughput RAID
                 6 hardware solution for disk arrays of up to 15
                 drives.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhuang:2011:CST,
  author =       "Xiaotong Zhuang and Santosh Pande",
  title =        "Compiler-Supported Thread Management for Multithreaded
                 Network Processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "44:1--44:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043668",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traditionally, runtime management involving CPU
                 sharing, real-time scheduling, etc., is provided by the
                 runtime environment (typically an operating system)
                 using hardware support such as timers and interrupts.
                 However, due to stringent performance requirements on
                 network processors, neither OS nor hardware mechanisms
                 are typically feasible/available. Mapping packet
                 processing tasks on network processors involves complex
                 trade-offs to maximize parallelism and pipelining. Due
                 to an increase in the size of the code store and
                 complexity of application requirements, network
                 processors are being programmed with heterogeneous
                 threads that may execute code belonging to different
                 tasks on a given micro-engine. Also, most network
                 applications are streaming applications that are
                 typically processed in a pipelined fashion.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Stuart:2011:RRN,
  author =       "Matthias Bo Stuart and Mikkel Bystrup Stensgaard and
                 Jens Spars{\o}",
  title =        "The {ReNoC} Reconfigurable {Network-on-Chip}:
                 Architecture, Configuration Algorithms, and
                 Evaluation",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "45:1--45:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043669",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a reconfigurable network-on-chip
                 architecture called ReNoC, which is intended for use in
                 general-purpose multiprocessor system-on-chip
                 platforms, and which enables application-specific
                 logical NoC topologies to be configured, thus providing
                 both efficiency and flexibility. The article presents
                 three novel algorithms that synthesize an
                 application-specific NoC topology, map it onto the
                 physical ReNoC architecture, and create deadlock-free,
                 application-specific routing algorithms. We apply our
                 algorithms to a mixture of real and synthetic
                 applications and target three different physical
                 architectures. Compared to a conventional NoC, ReNoC
                 reduces power consumption by up to 58\% on average.",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cucinotta:2011:RMA,
  author =       "Tommaso Cucinotta and Luca Abeni and Luigi Palopoli
                 and Giuseppe Lipari",
  title =        "A Robust Mechanism for Adaptive Scheduling of
                 Multimedia Applications",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "46:1--46:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043670",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose an adaptive scheduling technique to
                 schedule highly dynamic multimedia tasks on a CPU. We
                 use a combination of two techniques: the first one is a
                 feedback mechanism to track the resource requirements
                 of the tasks based on ``local'' observations. The
                 second one is a mechanism that operates with a
                 ``global'' visibility, reclaiming unused bandwidth. The
                 combination proves very effective: resource reclaiming
                 increases the robustness of the feedback, while the
                 identification of the correct bandwidth made by the
                 feedback increases the effectiveness of the
                 reclamation. We offer both theoretical results and an
                 extensive experimental validation of the approach.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Touati:2011:ESR,
  author =       "Sid-Ahmed-Ali Touati and Frederic Brault and Karine
                 Deschinkel and Beno{\^\i}t Dupont de Dinechin",
  title =        "Efficient Spilling Reduction for Software Pipelined
                 Loops in Presence of Multiple Register Types in
                 Embedded {VLIW} Processors",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "47:1--47:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043671",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Integrating register allocation and software
                 pipelining of loops is an active research area. We
                 focus on techniques that precondition the dependence
                 graph before software pipelining in order to ensure
                 that no register spill instructions are inserted by the
                 register allocator in the software pipelined loop. If
                 spilling is not necessary for the input code,
                 preconditioning techniques insert dependence arcs so
                 that the maximum register pressure MAXLIVE achieved by
                 any loop schedule is below the number of available
                 registers, without hurting the initiation interval if
                 possible. When a solution exists, a spill-free software
                 pipeline is guaranteed to exist. Existing
                 preconditioning techniques consider one register type
                 (register class) at a time [Deschinkel and Touati
                 2008].",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhou:2011:ARA,
  author =       "Gang Zhou and Qiang Li and Jingyuan Li and Yafeng Wu
                 and Shan Lin and Jian Lu and Chieh-Yih Wan and Mark
                 D. Yarvis and John A. Stankovic",
  title =        "Adaptive and Radio-Agnostic {QoS} for Body Sensor
                 Networks",
  journal =      j-TECS,
  volume =       "10",
  number =       "4",
  pages =        "48:1--48:??",
  month =        nov,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2043662.2043672",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Dec 19 15:49:06 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As wireless devices and sensors are increasingly
                 deployed on people, researchers have begun to focus on
                 wireless body-area networks. Applications of wireless
                 body sensor networks include healthcare, entertainment,
                 and personal assistance, in which sensors collect
                 physiological and activity data from people and their
                 environments. In these body sensor networks, quality of
                 service is needed to provide reliable data
                 communication over prioritized data streams. This
                 article proposes BodyQoS, the first running QoS system
                 demonstrated on an emulated body sensor network.
                 BodyQoS adopts an asymmetric architecture, in which
                 most processing is done on a resource-rich aggregator,
                 minimizing the load on resource-limited sensor nodes.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wandeler:2012:UGS,
  author =       "Ernesto Wandeler and Alexander Maxiaguine and Lothar
                 Thiele",
  title =        "On the use of greedy shapers in real-time embedded
                 systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146418",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Traffic shaping is a well-known technique in the area
                 of networking and is proven to reduce global buffer
                 requirements and end-to-end delays in networked
                 systems. Due to these properties, shapers also play an
                 increasingly important role in the design of
                 multiprocessor embedded systems that exhibit a
                 considerable amount of on-chip traffic. Despite the
                 growing importance of traffic shapping in this area, no
                 methods exist for analyzing shapers in distributed
                 embedded systems and for incorporating them into a
                 system-level performance analysis. Until now it was not
                 possible to determine the effect of shapers on
                 end-to-end delay guarantees or buffer requirements in
                 such systems. In this work, we present a method for
                 analyzing greedy shapers, and we embed this analysis
                 method into a well-established modular performance
                 analysis framework for real-time embedded systems. The
                 presented approach enables system-level performance
                 analysis of complete systems with greedy shapers, and
                 we prove its applicability by analyzing three case
                 study systems.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hamers:2012:EMS,
  author =       "Juan Hamers and Lieven Eeckhout",
  title =        "Exploiting media stream similarity for
                 energy-efficient decoding and resource prediction",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146419",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces a novel approach to
                 energy-efficient media stream decoding that is based on
                 the notion of media stream similarity. The key idea is
                 that platform-independent scenarios with similar
                 decoding complexity can be identified within and across
                 media streams. A device that decodes a media stream
                 annotated with scenario information can then adjust its
                 processor clock frequency and voltage level based on
                 these scenarios for lower energy consumption. Our
                 evaluation, done using the H.264 AVC decoder and 12
                 reference video streams, shows an average energy
                 reduction of 44\% while missing less than 0.2\% of the
                 frame deadlines using scenario-driven video decoding.
                 An additional application of scenario-based media
                 stream annotation is to predict required resources
                 (compute power and energy) for consuming a given
                 service on a given device. Resource prediction is
                 extremely useful in a client-server setup in which the
                 client requests a media service from the server or
                 content provider. The content provider (in cooperation
                 with the client) can then determine what service
                 quality to deliver, given the client's available
                 resources. Scenario-aware resource prediction can
                 predict (compute power and energy) consumption with
                 errors less than 4\% (and an overall average 1.4\%
                 error).",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhong:2012:WSN,
  author =       "Ziguo Zhong and Tian He",
  title =        "Wireless sensor node localization by multisequence
                 processing",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146420",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless Sensor Networks have been proposed for use in
                 many location-dependent applications. Most of these
                 need to identify the locations of sensor nodes, a
                 challenging task because of severe constraints on cost,
                 energy and effective range of sensor devices. To
                 overcome limitations in existing solutions, we present
                 a Multi-Sequence Positioning (MSP) method for
                 large-scale stationary sensor node localization in
                 outdoor environments. The novel idea behind MSP is to
                 reconstruct and estimate two-dimensional location
                 information for each sensor node by processing multiple
                 one-dimensional node sequences, easily obtained through
                 loosely guided event distribution. Starting from a
                 basic MSP design, we propose four optimizations that
                 work together to increase localization accuracy. We
                 address several interesting issues such as incomplete
                 (partial) node sequences and sequence flip, found in
                 the Mirage test-bed we built. We have evaluated the MSP
                 system through theoretical analysis, extensive
                 simulation as well as two physical systems (an indoor
                 version with 46 MICAz motes and an outdoor version with
                 20 MICAz motes). Evaluation demonstrates that MSP can
                 achieve an accuracy within one foot, requiring neither
                 additional costly hardware on sensor nodes nor precise
                 event distribution. In fact, it provides a nice
                 tradeoff between physical cost (anchors) and soft cost
                 (events) while maintaining localization accuracy.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Peng:2012:BHA,
  author =       "Chunyi Peng and Guobin Shen and Yongguang Zhang",
  title =        "{BeepBeep}: a high-accuracy acoustic-based system for
                 ranging and localization using {COTS} devices",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146421",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We present the design and implementation of BeepBeep,
                 a high-accuracy acoustic-based system for ranging and
                 localization. It is a pure software-based solution and
                 uses the most basic set of commodity hardware --- a
                 speaker, a microphone, and some form of interdevice
                 communication. The ranging scheme works without any
                 infrastructure and is applicable to sensor platforms
                 and commercial-off-the-shelf mobile devices. It
                 achieves high accuracy through three techniques:
                 two-way sensing, self-recording, and sample counting.
                 We further devise a scalable and fast localization
                 scheme. Our experiments show that up to one-centimeter
                 ranging accuracy and three-centimeter localization
                 accuracy can be achieved.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kumar:2012:CMA,
  author =       "T. S. Rajesh Kumar and R. Govindarajan and C. P.
                 Ravikumar",
  title =        "On-chip memory architecture exploration framework for
                 {DSP} processor-based embedded system on chip",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146422",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Today's SoCs are complex designs with multiple
                 embedded processors, memory subsystems, and application
                 specific peripherals. The memory architecture of
                 embedded SoCs strongly influences the power and
                 performance of the entire system. Further, the memory
                 subsystem constitutes a major part (typically up to
                 70\%) of the silicon area for the current day SoC. In
                 this article, we address the on-chip memory
                 architecture exploration for DSP processors which are
                 organized as multiple memory banks, where banks can be
                 single/dual ported with non-uniform bank sizes. In this
                 paper we propose two different methods for physical
                 memory architecture exploration and identify the
                 strengths and applicability of these methods in a
                 systematic way. Both methods address the memory
                 architecture exploration for a given target application
                 by considering the application's data access
                 characteristics and generates a set of Pareto-optimal
                 design points that are interesting from a power,
                 performance and VLSI area perspective. To the best of
                 our knowledge, this is the first comprehensive work on
                 memory space exploration at physical memory level that
                 integrates data layout and memory exploration to
                 address the system objectives from both hardware design
                 and application software development perspective.
                 Further we propose an automatic framework that explores
                 the design space identifying 100's of Pareto-optimal
                 design points within a few hours of running on a
                 standard desktop configuration.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Pande:2012:PDP,
  author =       "Amit Pande and Joseph Zambreno",
  title =        "{Poly-DWT}: {Polymorphic} wavelet hardware support for
                 dynamic image compression",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146423",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many modern computing applications have been enabled
                 through the use of real-time multimedia processing.
                 While several hardware architectures have been proposed
                 in the research literature to support such primitives,
                 these fail to address applications whose performance
                 and resource requirements have a dynamic aspect.
                 Embedded multimedia systems typically need a power and
                 computation efficient design in addition to good
                 compression performance. In this article, we introduce
                 a Polymorphic Wavelet Architecture (Poly-DWT) as a
                 crucial building block towards the development of
                 embedded systems to address such challenges. We
                 illustrate how our Poly-DWT architecture can
                 potentially make dynamic resource allocation decisions,
                 such as the internal bit representation and the
                 processing kernel, according to the application
                 requirements. We introduce a filter switching
                 architecture that allows for dynamic switching between
                 5/3 and 9/7 wavelet filters and leads to a more power
                 efficient design. Further, a multiplier-free design
                 with a low adder requirement demonstrates the potential
                 of Poly-DWT for embedded systems. Through an FPGA
                 prototype, we perform a quantitative analysis of our
                 Poly-DWT architecture, and compare our filter to
                 existing approaches to illustrate the area and
                 performance benefits inherent in our approach.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Seo:2012:RGV,
  author =       "Suk-Hyun Seo and Jin-Ho Kim and Sung-Ho Hwang and Key
                 Ho Kwon and Jae Wook Jeon",
  title =        "A reliable gateway for in-vehicle networks based on
                 {LIN}, {CAN}, and {FlexRay}",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146424",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article describes a reliable gateway for
                 in-vehicle networks. Such networks include local
                 interconnect networks, controller area networks, and
                 FlexRay. There is some latency when transferring a
                 message from one node (source) to another node
                 (destination). A high probability of error exists due
                 to different protocol specifications such as baud-rate,
                 and message frame format. Therefore, deploying a
                 reliable gateway is a challenge to the automotive
                 industry. We propose a reliable gateway based on the
                 OSEK/VDX components for in-vehicle networks. We also
                 examine the gateway system developed, and then we
                 evaluate the performance of our proposed system.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huang:2012:EFP,
  author =       "Kai Huang and Wolfgang Haid and Iuliana Bacivarov and
                 Matthias Keller and Lothar Thiele",
  title =        "Embedding formal performance analysis into the design
                 cycle of {MPSoCs} for real-time streaming
                 applications",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146425",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Modern real-time streaming applications are
                 increasingly implemented on multiprocessor
                 systems-on-chip (MPSoC). The implementation, as well as
                 the verification of real-time applications executing on
                 MPSoCs, are difficult tasks, however. A major challenge
                 is the performance analysis of MPSoCs, which is
                 required for early design space exploration and final
                 system verification. Simulation-based methods are not
                 well-suited for this purpose, due to long runtimes and
                 non-exhaustive corner-case coverage. To overcome these
                 limitations, formal performance analysis methods that
                 provide guarantees for meeting real-time constraints
                 have been developed. Embedding formal performance
                 analysis into the MPSoC design cycle requires the
                 generation of a faithful analysis model and its
                 calibration with the system-specific parameters. In
                 this article, a design flow that automates these steps
                 is presented. In particular, we integrate modular
                 performance analysis (MPA) into the distributed
                 operation layer (DOL) MPSoC programming environment.
                 The result is an MPSoC software design flow that allows
                 for automatically generating the system implementation,
                 together with an analysis model for system
                 verification.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chang:2012:AFS,
  author =       "Yuan-Hao Chang and Po-Liang Wu and Tei-Wei Kuo and
                 Shih-Hao Hung",
  title =        "An adaptive file-system-oriented {FTL} mechanism for
                 flash-memory storage systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146426",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As flash memory becomes popular over various
                 platforms, there is a strong demand regarding the
                 performance degradation problem, due to the special
                 characteristics of flash memory. This research proposes
                 the design of a file-system-oriented flash translation
                 layer, in which a filter mechanism is designed to
                 separate the access requests of file-system metadata
                 and file contents for better performance. A recovery
                 scheme is then proposed for maintaining the integrity
                 of a file system. The proposed flash translation layer
                 is implemented as a Linux device driver and evaluated
                 with respect to ext2 and ext3 file systems. Experiments
                 were also done over NTFS by a series of realistic
                 traces. The experimental results show significant
                 performance improvement over ext2, ext3, and NTFS file
                 systems with limited system overheads.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Li:2012:SRS,
  author =       "Chunxiao Li and Niraj K. Jha and Anand Raghunathan",
  title =        "Secure reconfiguration of software-defined radio",
  journal =      j-TECS,
  volume =       "11",
  number =       "1",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2146417.2146427",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Apr 2 17:42:24 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Software-defined radio (SDR) implements a radio system
                 in software that executes on a programmable processor.
                 The components of SDR, such as the filters, amplifiers,
                 and modulators, can be easily reconfigured to adapt to
                 the operating environment and user preferences.
                 However, the flexibility of radio reconfiguration
                 brings along the serious security concern of malicious
                 modification of software in the SDR system, leading to
                 radio malfunction and interference with other users'
                 communications. Both the SDR device and the network
                 need to be protected from such malicious radio
                 reconfiguration. In this article, a new architecture
                 targeted at protecting SDR devices from malicious
                 reconfiguration is proposed. The architecture is based
                 on robust separation of the radio operation environment
                 and user application environment, through the use of
                 virtualization. A new radio middleware layer is
                 designed to securely intercept all attempts to
                 reconfigure the radio, and a security policy monitor
                 checks the target configuration against security
                 policies that represent the interests of various
                 parties. Even if the operating system in the user
                 application environment is compromised, the proposed
                 architecture can ensure secure reconfiguration in the
                 radio operation environment. We have prototyped the
                 proposed secure SDR architecture using VMware and the
                 GNU Radio toolkit and demonstrate that overheads
                 incurred by the architecture are small and tolerable.
                 Therefore, we believe that the proposed solution could
                 be applied to address secure SDR reconfiguration in
                 both general-purpose and embedded computing systems.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Berekovic:2012:ISS,
  author =       "Mladen Berekovic and Samarjit Chakraborty and Petru
                 Eles and Andy D. Pimentel",
  title =        "Introduction to the {Special Section on
                 ESTIMedia'08}",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "11:1--11:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180891",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhu:2012:PAR,
  author =       "Jun Zhu and Ingo Sander and Axel Jantsch",
  title =        "Performance Analysis of Reconfigurations in
                 Adaptive Real-Time Streaming Applications",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "12:1--12:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180888",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We propose a performance analysis framework for
                 adaptive real-time synchronous data flow streaming
                 applications on runtime reconfigurable FPGAs. As the
                 main contribution, we present a constraint based
                 approach to capture both streaming application
                 execution semantics and the varying design concerns
                 during reconfigurations. With our event models
                 constructed as cumulative functions on data streams, we
                 exploit a novel compile-time analysis framework based
                 on iterative timing phases. Finally, we implement our
                 framework on a public domain constraint solver, and
                 illustrate its capabilities in the analysis of design
                 trade-offs due to reconfigurations with experiments.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hsieh:2012:PBP,
  author =       "Kun-Yuan Hsieh and Chi-Hua Lai and Shang-Hong Lai and
                 Jenq Kuen Lee",
  title =        "Parallelization of Belief Propagation on {Cell}
                 Processors for Stereo Vision",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "13:1--13:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180889",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Markov random field models provide a robust
                 formulation for the stereo vision problem of inferring
                 three-dimensional scene geometry from two images taken
                 from different viewpoints. One of the most advanced
                 algorithms for solving the associated energy
                 minimization problem in the formulation is belief
                 propagation (BP). Although BP provides very accurate
                 results in solving stereo vision problems, the high
                 computational cost of the algorithm hinders it from
                 real-time applications. In recent years, multicore
                 architectures have been widely adopted in various
                 industrial application domains. The high computing
                 power of multicore processors provides new
                 opportunities to implement stereo vision algorithms.
                 This article examines and extracts the parallelisms in
                 the BP method for stereo vision on multicore
                 processors. This article shows that parallelism of the
                 algorithm can be efficiently utilized on multicore
                 processors. The results show that parallelization on
                 multicore processors provides a speedup for the BP
                 algorithm of almost 15 times compared to the
                 single-processor implementation on the PPE of the Cell
                 BE. The experimental results also indicate that a frame
                 rate of 6.5 frames/second is possible when implementing
                 the parallelized BP algorithm on the multicore
                 processor of Cell BE with one PPE and six SPEs.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Terechko:2012:BPS,
  author =       "Andrei Terechko and Jan Hoogerbrugge and Ghiath Alkadi
                 and Surendra Guntur and Anirban Lahiri and Marc
                 Duranton and Clemens W{\"u}st and Phillip Christie and
                 Axel Nackaerts and Aatish Kumar",
  title =        "Balancing Programmability and Silicon Efficiency of
                 Heterogeneous Multicore Architectures",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "14:1--14:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180890",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multicore architectures provide scalable performance
                 with a lower hardware design effort than single core
                 processors. Our article presents a design methodology
                 and an embedded multicore architecture, focusing on
                 reducing the software design complexity and boosting
                 the performance density. First, we analyze
                 characteristics of the Task-Level Parallelism in modern
                 multimedia workloads. These characteristics are used to
                 formulate requirements for the programming model. Then
                 we translate the programming model requirements to an
                 architecture specification, including a novel
                 low-complexity implementation of cache coherence and a
                 hardware synchronization unit. Our evaluation
                 demonstrates that the novel coherence mechanism
                 substantially simplifies hardware design, while
                 reducing the performance by less than 18\% relative to
                 a complex snooping technique. Compared to a single
                 processor core, the multicores have already proven to
                 be more area- and energy-efficient. However, the
                 multicore architectures in embedded systems still
                 compete with highly efficient function-specific
                 hardware accelerators. In this article we identify five
                 architectural methods to boost performance density of
                 multicores; microarchitectural downscaling, asymmetric
                 multicore architectures, multithreading, generic
                 accelerators, and conjoining. Then, we present a novel
                 methodology to explore multicore design spaces,
                 including the architectural methods improving the
                 performance density. The methodology is based on a
                 complex formula computing performances of heterogeneous
                 multicore systems. Using this design space exploration
                 methodology for HD and QuadHD H.264 video decoding, we
                 estimate that the required areas of multicores in CMOS
                 45 nm are 2.5 mm$^2$ and 8.6 mm$^2$, respectively.
                 These results suggest that heterogeneous multicores are
                 cost-effective for embedded applications and can
                 provide a good programmability support.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Khajeh:2012:EAA,
  author =       "Amin Khajeh and Minyoung Kim and Nikil Dutt and Ahmed
                 M. Eltawil and Fadi J. Kurdahi",
  title =        "Error-Aware Algorithm\slash Architecture Coexploration
                 for Video Over Wireless Applications",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "15:1--15:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180892",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a cross-layer
                 algorithm/architecture coexploration for wireless
                 multimedia systems to coordinate interactions among
                 sublayer optimizers for improvements in
                 energy/QoS/reliability. By exploiting the inherent
                 redundancy in wireless multimedia systems, we generate
                 an expanded design space over traditional
                 layer-specific approaches. Specifically, we control the
                 error resilient encoder at the application layer to
                 provide awareness of architectural exploration at the
                 physical layer allowing new design points with lower
                 power consumption via aggressive voltage scaling. While
                 trying to reduce energy consumption, the fault tolerant
                 technique compensates the effect of the hardware and
                 network errors due to aggressive voltage scaling and
                 lossy transmission, respectively. Our experiments on
                 H.263 video over a WCDMA communication system
                 demonstrate that coexploration enlarges the feasible
                 design space, which results in significant power
                 savings of more than 20\% in the WCDMA modem.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Salamy:2012:SOT,
  author =       "Hassan Salamy and J. Ramanujam",
  title =        "Storage Optimization through Offset Assignment with
                 Variable Coalescing",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "16:1--16:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180893",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Most modern digital signal processors (DSPs) provide
                 multiple address registers and a dedicated address
                 generation unit (AGU) which performs address generation
                 in parallel to instruction execution. There is no
                 address computation overhead if the next address is
                 within the auto-modify range. A careful placement of
                 variables in memory is utilized to decrease the number
                 of address arithmetic instructions and thus to generate
                 compact and efficient code. The simple offset
                 assignment (SOA) problem concerns the layout of
                 variables for machines with one address register and
                 the general offset assignment (GOA) deals with multiple
                 address registers. Both these problems assume that each
                 variable needs to be allocated for the entire duration
                 of a program. Both SOA and GOA are NP-complete. In this
                 article, we present effective heuristics for the simple
                 and the general offset assignment problems with
                 variable coalescing where two or more non-interfering
                 variables can be mapped into the same memory location.
                 Results on several benchmarks show the significant
                 improvement of our proposed heuristics compared to
                 other heuristics in the literature.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Falk:2012:ISS,
  author =       "Heiko Falk and Peter Marwedel",
  title =        "Introduction to the {Special Section on SCOPES'09}",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "17:1--17:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180894",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2012:FLF,
  author =       "Jaegeuk Kim and Hyotaek Shim and Seon-Yeong Park and
                 Seungryoul Maeng and Jin-Soo Kim",
  title =        "{FlashLight}: a Lightweight Flash File System for
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "18:1--18:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180895",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A very promising approach for using NAND flash memory
                 as a storage medium is a flash file system. In order to
                 design a higher-performance flash file system, two
                 issues should be considered carefully. One issue is the
                 design of an efficient index structure that contains
                 the locations of both files and data in the flash
                 memory. For large-capacity storage, the index structure
                 must be stored in the flash memory to realize low
                 memory consumption; however, this may degrade the
                 system performance. The other issue is the design of a
                 novel garbage collection (GC) scheme that reclaims
                 obsolete pages. This scheme can induce considerable
                 additional read and write operations while identifying
                 and migrating valid pages. In this article, we present
                 a novel flash file system that has the following
                 features: (i) a lightweight index structure that
                 introduces the hybrid indexing scheme and intra-inode
                 index logging, and (ii) an efficient GC scheme that
                 adopts a dirty list with an on-demand GC approach as
                 well as fine-grained data separation and erase-unit
                 data allocation. We implemented FlashLight in a Linux
                 OS with kernel version 2.6.21 on an embedded device.
                 The experimental results obtained using several
                 benchmark programs confirm that FlashLight improves the
                 performance by up to 27.4\% over UBIFS by alleviating
                 index management and GC overheads by up to 33.8\%.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Eriksson:2012:ICG,
  author =       "Mattias Eriksson and Christoph Kessler",
  title =        "Integrated Code Generation for Loops",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "19:1--19:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180896",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Code generation in a compiler is commonly divided into
                 several phases: instruction selection, scheduling,
                 register allocation, spill code generation, and, in the
                 case of clustered architectures, cluster assignment.
                 These phases are interdependent; for instance, a
                 decision in the instruction selection phase affects how
                 an operation can be scheduled We examine the effect of
                 this separation of phases on the quality of the
                 generated code. To study this we have formulated
                 optimal methods for code generation with integer linear
                 programming; first for acyclic code and then we extend
                 this method to modulo scheduling of loops. In our
                 experiments we compare optimal modulo scheduling, where
                 all phases are integrated, to modulo scheduling, where
                 instruction selection and cluster assignment are done
                 in a separate phase. The results show that, for an
                 architecture with two clusters, the integrated method
                 finds a better solution than the nonintegrated method
                 for 27\% of the instances.",
  acknowledgement = ack-nhfb,
  articleno =    "19",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Murray:2012:ASL,
  author =       "Alastair Murray and Bj{\"o}rn Franke",
  title =        "Adaptive Source-Level Data Assignment to Dual Memory
                 Banks",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "20:1--20:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180897",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dual memory banks provide extra memory bandwidth to
                 DSP applications and enable simultaneous access to two
                 operands if the data is partitioned appropriately.
                 Fully automated and compiler integrated approaches to
                 data partitioning and memory bank assignment have,
                 however, found little acceptance by DSP software
                 developers. In this article we present a novel
                 source-level approach that is more programmer friendly.
                 Our scheme is based on soft graph coloring and highly
                 adaptive heuristics generated by genetic programming.
                 We have evaluated our scheme on an Analog Devices
                 TigerSHARC TS-101 DSP and achieved speedups of up to
                 57\% on 13 UTDSP benchmarks.",
  acknowledgement = ack-nhfb,
  articleno =    "20",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Boissinot:2012:SPR,
  author =       "Benoit Boissinot and Philip Brisk and Alain Darte and
                 Fabrice Rastello",
  title =        "{SSI} Properties Revisited",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "21:1--21:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180898",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The static single information (SSI) form is an
                 extension of the static single assignment (SSA) form, a
                 well-established compiler intermediate representation
                 that has been successfully used for numerous compiler
                 analysis and optimizations. Several interesting results
                 have also been shown for SSI form concerning liveness
                 analysis and the representation of live-ranges of
                 variables, which could make SSI form appealing for
                 just-in-time compilation. Unfortunately, we have
                 uncovered several mistakes in the previous literature
                 on SSI form, which, admittedly, is already quite
                 sparse. This article corrects the mistakes that are
                 most germane to SSI form. We first explain why the two
                 definitions of SSI form proposed in past literature,
                 first by C. S. Ananian, then by J. Singer, are not
                 equivalent. Our main result is then to prove that basic
                 blocks, and thus program points, can be totally ordered
                 so that live-ranges of variables correspond to
                 intervals on a line, a result that holds for both
                 variants of SSI form. In other words, in SSI form, the
                 intersection graph defined by live-ranges is an
                 interval graph, a stronger structural property than for
                 SSA form for which the intersection graph of
                 live-ranges is chordal. Finally, we show how this
                 structure of live-ranges can be used to simplify
                 liveness analysis.",
  acknowledgement = ack-nhfb,
  articleno =    "21",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Franke:2012:SPM,
  author =       "Bj{\"o}rn Franke",
  title =        "Statistical Performance Modeling in Functional
                 Instruction Set Simulators",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "22:1--22:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180899",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Despite the recent progress in improving the speed of
                 instruction-accurate simulators cycle-accurate
                 simulation is still prohibitively slow for all but the
                 most basic programs. In this article we present a
                 statistical machine learning approach to performance
                 estimation in fast, instruction accurate simulators and
                 evaluate our methodology comprehensively against three
                 popular embedded RISC processors and about 300 embedded
                 applications. We show that our methodology is capable
                 of providing accurate performance estimations with an
                 average error of less than 3.9\% while, on average,
                 operating $\approx 14.5$ times faster than
                 cycle-accurate simulation.",
  acknowledgement = ack-nhfb,
  articleno =    "22",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chandraiah:2012:CAR,
  author =       "Pramod Chandraiah and Rainer D{\"o}mer",
  title =        "Computer-Aided Recoding to Create Structured and
                 Analyzable System Models",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "23:1--23:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180900",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded system design, the quality of the input
                 model has a direct bearing on the effectiveness of the
                 system exploration and synthesis tools. Given a
                 well-written system model, tools today are effective in
                 generating working implementations. However, readily
                 available C reference code is not conducive for
                 immediate system synthesis as it lacks needed features
                 for automatic analysis and synthesis. Among others, the
                 lack of proper structure and the presence of
                 intractable pointers in the reference code are factors
                 that seriously hamper the effectiveness of system
                 design tools. To overcome these deficiencies, we aim to
                 automate the conversion of flat C code into a
                 well-structured system model by applying automated
                 source code transformations. We present a set of
                 computer-aided recoding operations that enable the
                 system designer to mitigate pointer problems and
                 quickly create the necessary structural hierarchy so
                 that the design model becomes easily analyzable and
                 synthesizable. Utilizing the designer's knowledge, our
                 interactive recoding transformations aid the designer
                 in efficiently creating well-structured system models
                 for rapid design space exploration and successful
                 synthesis. Our estimated and measured experimental
                 results show significant productivity gains through a
                 substantial reduction of the model creation time.",
  acknowledgement = ack-nhfb,
  articleno =    "23",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dubach:2012:EPE,
  author =       "Christophe Dubach and Timothy M. Jones and Michael F.
                 P. O'Boyle",
  title =        "Exploring and Predicting the Effects of
                 Microarchitectural Parameters and Compiler
                 Optimizations on Performance and Energy",
  journal =      j-TECS,
  volume =       "11S",
  number =       "1",
  pages =        "24:1--24:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2180887.2180901",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jun 7 16:18:52 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Embedded processor performance is dependent on both
                 the underlying architecture and the compiler
                 optimizations applied. However, designing both
                 simultaneously is extremely difficult to achieve due to
                 the time constraints designers must work under.
                 Therefore, current methodology involves designing
                 compiler and architecture in isolation, leading to
                 suboptimal performance of the final product. This
                 article develops a novel approach to this codesign
                 space problem. For our specific design space, we
                 demonstrate that we can automatically predict the
                 performance that an optimizing compiler would achieve
                 without actually tuning it for any of the
                 microarchitecture configurations considered. Once
                 trained, a single run of the program compiled with the
                 standard optimization setting is enough to make a
                 prediction on the new microarchitecture with just a
                 3.2\% error rate on average. This allows the designer
                 to accurately choose an architectural configuration
                 with knowledge of how an optimizing compiler will
                 perform on it. We use this to find the best optimizing
                 compiler/architectural configuration in our codesign
                 space and demonstrate that it achieves an average 19\%
                 performance improvement and energy savings of 16\%
                 compared to the baseline, nearly doubling the
                 energy-efficiency measured as the energy-delay-squared
                 product (EDD).",
  acknowledgement = ack-nhfb,
  articleno =    "24",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Staff:2012:APA,
  author =       "{TECS Staff}",
  title =        "Abstracts of Papers to appear in {Special Supplemental
                 Issue of TECS (v11, iSupplemental1)}",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In order to speed up the publication process, we have
                 begun to publish supplemental online-only issues. The
                 following abstracts describe the articles in the first
                 such issue, Vol. 11S(1). These articles are available
                 in the Digital Library.",
  acknowledgement = ack-nhfb,
  articleno =    "25",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2012:PPI,
  author =       "Jongeun Lee and Aviral Shrivastava",
  title =        "{PICA}: {Processor Idle Cycle Aggregation} for
                 Energy-Efficient Embedded Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Processor Idle Cycle Aggregation (PICA) is a promising
                 approach for low-power execution of processors, in
                 which small memory stalls are aggregated to create
                 large ones, enabling profitable switch of the processor
                 into low-power mode. We extend the previous approach in
                 three dimensions. First we develop static analysis for
                 the PICA technique and present optimal parameters for
                 five common types of loops based on steady-state
                 analysis. Second, to remedy the weakness of
                 software-only control in varying environment, we
                 enhance PICA with minimal hardware extension that
                 ensures correct execution for any loops and parameters,
                 thus greatly facilitating exploration-based parameter
                 tuning. Third, we demonstrate that our PICA technique
                 can be applied to certain types of nested loops with
                 variable bounds, thus enhancing the applicability of
                 PICA. We validate our analytical model against
                 simulation-based optimization and also show, through
                 our experiments on embedded application benchmarks,
                 that our technique can be applied to a wide range of
                 loops with average 20\% energy reductions, compared to
                 executions without PICA.",
  acknowledgement = ack-nhfb,
  articleno =    "26",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{McIntire:2012:EES,
  author =       "Dustin McIntire and Thanos Stathopoulos and Sasank
                 Reddy and Thomas Schmidt and William J. Kaiser",
  title =        "Energy-Efficient Sensing with the {Low Power, Energy
                 Aware Processing} ({LEAP}) Architecture",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A broad range of embedded networked sensing (ENS)
                 applications have appeared for large-scale systems,
                 introducing new requirements leading to new embedded
                 architectures, associated algorithms, and supporting
                 software systems. These new requirements include the
                 need for diverse and complex sensor systems that
                 present demands for energy and computational resources,
                 as well as for broadband communication. To satisfy
                 application demands while maintaining critical support
                 for low-energy operation, a new multiprocessor node
                 hardware and software architecture, Low Power Energy
                 Aware Processing (LEAP), has been developed. In this
                 article, we described the LEAP design approach, in
                 which the system is able to adaptively select the most
                 energy-efficient hardware components matching an
                 application's needs. The LEAP platform supports highly
                 dynamic requirements in sensing fidelity, computational
                 load, storage media, and network bandwidth. It focuses
                 on episodic operation of each component and considers
                 the energy dissipation for each platform task by
                 integrating fine-grained energy-dissipation monitoring
                 and sophisticated power-control scheduling for all
                 subsystems, including sensors. In addition to the LEAP
                 platform's unique hardware capabilities, its software
                 architecture has been designed to provide an easy way
                 to use power management interface and a robust,
                 fault-tolerant operating environment and to enable
                 remote upgrade of all software components. LEAP
                 platform capabilities are demonstrated by example
                 implementations, such as a network protocol design and
                 a light source event detection algorithm. Through the
                 use of a distributed node testbed, we demonstrate that
                 by exploiting high energy-efficiency components and
                 enabling proper on-demand scheduling, the LEAP
                 architecture may meet both sensing performance and
                 energy dissipation objectives for a broad class of
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "27",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wang:2012:DCR,
  author =       "Weixun Wang and Prabhat Mishra and Ann Gordon-Ross",
  title =        "Dynamic Cache Reconfiguration for Soft Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In recent years, efficient dynamic reconfiguration
                 techniques have been widely employed for system
                 optimization. Dynamic cache reconfiguration is a
                 promising approach for reducing energy consumption as
                 well as for improving overall system performance. It is
                 a major challenge to introduce cache reconfiguration
                 into real-time multitasking systems, since dynamic
                 analysis may adversely affect tasks with timing
                 constraints. This article presents a novel approach for
                 implementing cache reconfiguration in soft real-time
                 systems by efficiently leveraging static analysis
                 during runtime to minimize energy while maintaining the
                 same service level. To the best of our knowledge, this
                 is the first attempt to integrate dynamic cache
                 reconfiguration in real-time scheduling techniques. Our
                 experimental results using a wide variety of
                 applications have demonstrated that our approach can
                 significantly reduce the cache energy consumption in
                 soft real-time systems (up to 74\%).",
  acknowledgement = ack-nhfb,
  articleno =    "28",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Palermo:2012:VAR,
  author =       "Gianluca Palermo and Cristina Silvano and Vittorio
                 Zaccaria",
  title =        "A Variability-Aware Robust Design Space Exploration
                 Methodology for On-Chip Multiprocessors Subject to
                 Application-Specific Constraints",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Manufacturing process variation is dramatically
                 becoming one of the most important challenges related
                 to power and performance optimization for sub-90nm CMOS
                 technologies. Process variability impacts the
                 optimization of the target system metrics, that is,
                 performance and energy consumption by introducing
                 fluctuations and unpredictability. Besides, it impacts
                 the parametric yield of the chip with respect to
                 application level constraints by reducing the number of
                 devices working within normal operating conditions. The
                 impact of variability on systems with stringent
                 application-specific requirements (such as portable
                 multimedia and critical embedded systems) is much
                 greater than on general-purpose systems given the
                 emphasis on predictability and reduced operating
                 margins. In this market segment, failing to address
                 such a problem within the early design stages of the
                 chip may lead to missing market deadlines and suffering
                 greater economic losses. In the context of a design
                 space exploration framework for supporting the
                 platform-based design approach, we address the problem
                 of robustness with respect to manufacturing process
                 variations. First, we apply Response Surface Modeling
                 (RSM) techniques to enable an efficient evaluation of
                 the statistical measures of execution time and energy
                 consumption for each system configuration. Then, we
                 apply a robust design space exploration framework to
                 afford the problem of the impact of manufacturing
                 process variations onto the system-level metrics and
                 consequently onto the application-level constraints. We
                 finally provide a comparison of our design space
                 exploration technique with conventional approaches on
                 two different case studies.",
  acknowledgement = ack-nhfb,
  articleno =    "29",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yang:2012:UEP,
  author =       "Yoon Seok Yang and Gwan Choi",
  title =        "Unequal Error Protection Based on {DVFS} for {JSCD} in
                 Low-Power Portable Multimedia Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a low-power decoder design for
                 joint source-channel decoding (JSCD) based on a novel
                 unequal error protection (UEP) scheme over additive
                 white Gaussian noise (AWGN) channels. Conventional JSCD
                 schemes, adopting low-density parity check (LDPC) codes
                 for multimedia devices, typically operate at a
                 fixed-time decoding loop, regardless of the quality of
                 data received. We present a JSCD scheme that achieves
                 reduction in power through minimum energy decoding and
                 dynamic voltage and frequency scaling (DVFS).
                 Consequently, up to 39\% power reduction is achieved in
                 Foreman, Akiyo, and Mobile video streams without
                 performance degradation in reconstructed video
                 quality.",
  acknowledgement = ack-nhfb,
  articleno =    "30",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Namin:2012:EFF,
  author =       "Ashkan Hosseinzadeh Namin and Huapeng Wu and Majid
                 Ahmadi",
  title =        "An Efficient Finite Field Multiplier Using Redundant
                 Representation",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "An efficient word-level finite field multiplier using
                 redundant representation is proposed. The proposed
                 multiplier has a significantly higher speed, compared
                 to previously proposed word-level architectures using
                 either redundant representation or optimal normal basis
                 type I, at the expense of moderately higher area
                 complexity. Furthermore, the new design out-performs
                 other similar proposals when considering the product of
                 area and delay as a measure of performance. ASIC
                 Realization of the proposed design using TSMC's .18 um
                 CMOS technology for the binary field size of 163 is
                 also presented.",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Leyva-del-Foyo:2012:ITI,
  author =       "Luis E. Leyva-del-Foyo and Pedro Mejia-Alvarez and
                 Dionisio de Niz",
  title =        "Integrated Task and Interrupt Management for Real-Time
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Real-time scheduling algorithms like RMA or EDF and
                 their corresponding schedulability test have proven to
                 be powerful tools for developing predictable real-time
                 systems. However, the traditional interrupt management
                 model presents multiple inconsistencies that break the
                 assumptions of many of the real-time scheduling tests,
                 diminishing its utility. In this article, we analyze
                 these inconsistencies and present a model that resolves
                 them by integrating interrupts and tasks in a single
                 scheduling model. We then use the RMA theory to
                 calculate the cost of the model and analyze the
                 circumstances under which it can provide the most
                 value. This model was implemented in a kernel module.
                 The portability of the design of our module is
                 discussed in terms of its independence from both the
                 hardware and the kernel. We also discuss the
                 implementation issues of the model over conventional PC
                 hardware, along with its cost and novel optimizations
                 for reducing the overhead. Finally, we present our
                 experimental evaluation to show evidence of its
                 temporal determinism and overhead.",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Garg:2012:IMP,
  author =       "Siddharth Garg and Diana Marculescu",
  title =        "On the Impact of Manufacturing Process Variations on
                 the Lifetime of Sensor Networks",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "33:1--33:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The lifetime of individual nodes in a sensor network
                 depends strongly on the leakage power of the nodes in
                 idle state. With technology scaling, variability in
                 leakage power dissipation of sensor nodes will cause
                 increased variability in their lifetimes. In this
                 article, we analyze how the lifetime variations of
                 sensor nodes affect the performance of the sensor
                 network as a whole. We demonstrate the use of the
                 proposed framework to explore deployment cost versus
                 performance trade-offs for sensor networks. Results
                 indicate that up to 37\% improvement in the critical
                 lifetime of a sensor network can be obtained with a
                 20\% increase in deployment cost.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Blech:2012:GIB,
  author =       "Jan Olaf Blech and Micha{\"e}l P{\'e}rin",
  title =        "Generating Invariant-Based Certificates for Embedded
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "34:1--34:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic verification tools, such as model checkers
                 and tools based on static analysis or on abstract
                 interpretation, have become popular in software and
                 hardware development. They increase confidence and
                 potentially provide rich feedback. However, with
                 increasing complexity, verification tools themselves
                 are more likely to contain errors. In contrast to
                 automatic verification tools, higher-order theorem
                 provers use mathematically founded proof strategies
                 checked by a small proof checker to guarantee selected
                 properties. Thus, they enjoy a high level of
                 trustability. Properties of software and hardware
                 systems and their justifications can be encapsulated
                 into a certificate, thereby guaranteeing correctness of
                 the systems, with respect to the properties. These
                 results offer a much higher degree of confidence than
                 results achieved by verification tools. However,
                 higher-order theorem provers are usually slow, due to
                 their general and minimalistic nature. Even for small
                 systems, a lot of human interaction is required for
                 establishing a certificate. In this work, we combine
                 the advantages of automatic verification tools (i.e.,
                 speed and automation) with those of higher-order
                 theorem provers (i.e., high level of trustability). The
                 verification tool generates a certificate for each
                 invocation. This is checked by the higher-order theorem
                 prover, thereby guaranteeing the desired property. The
                 generation of certificates is much easier than
                 producing the analysis results of the verification tool
                 in the first place. In our work, we are able to create
                 certificates that come with an algorithmic description
                 of the proof of the desired property as justification.
                 We concentrate on verification tools that generate
                 invariants of systems and certify automatically that
                 these do indeed hold. Our approach is applied to the
                 certification of the verdicts of a deadlock-detection
                 tool for an asynchronous component-based language.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jeong:2012:PLT,
  author =       "Jaein Jeong and David Culler",
  title =        "Predicting the Long-Term Behavior of a Micro-Solar
                 Power System",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Micro-solar power system design is challenging because
                 it must address long-term system behavior under highly
                 variable solar energy conditions and consider a large
                 space of design options. Several micro-solar power
                 systems and models have been made, validating
                 particular points in the whole design space. We provide
                 a general architecture of micro-solar power
                 systems---comprising key components and
                 interconnections among the components---and formalize
                 each component in an analytical or empirical model of
                 its behavior. To model the variability of solar energy,
                 we provide three solar radiation models, depending on
                 the degree of information available: an astronomical
                 model for ideal conditions, an obstructed astronomical
                 model for estimating solar radiation under the presence
                 of shadows and obstructions, and a weather-effect model
                 for estimating solar radiation under weather variation.
                 Our solar radiation models are validated with a
                 concrete design, the HydroWatch node, thus achieving
                 small deviation from the long-term measurement. They
                 can be used in combination with other micro-solar
                 system models to improve the utility of the load and
                 estimate the behavior of micro-solar power systems more
                 accurately. Thus, our solar radiation models provide
                 more accurate estimations of solar radiation and close
                 the loop for micro-solar power system modeling.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Smith:2012:OSH,
  author =       "Melissa C. Smith and Gregory D. Peterson",
  title =        "Optimization of Shared High-Performance Reconfigurable
                 Computing Resources",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In the field of high-performance computing, systems
                 harboring reconfigurable devices, such as
                 field-programmable gate arrays (FPGAs), are gaining
                 more widespread interest. Such systems range from
                 supercomputers with tightly coupled reconfigurable
                 hardware to clusters with reconfigurable devices at
                 each node. The use of these architectures for
                 scientific computing provides an alternative for
                 computationally demanding problems and has advantages
                 in metrics, such as operating cost/performance and
                 power/performance. However, performance optimization of
                 these systems can be challenging even with knowledge of
                 the system's characteristics. Our analytic performance
                 model includes parameters representing the
                 reconfigurable hardware, application load imbalance
                 across the nodes, background user load, basic
                 message-passing communication, and processor
                 heterogeneity. In this article, we provide an overview
                 of the analytical model and demonstrate its application
                 for optimization and scheduling of high-performance
                 reconfigurable computing (HPRC) resources. We examine
                 cost functions for minimum runtime and other
                 optimization problems commonly found in shared
                 computing resources. Finally, we discuss additional
                 scheduling issues and other potential applications of
                 the model.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2012:EEA,
  author =       "Kyoungwoo Lee and Nikil Dutt and Nalini
                 Venkatasubramanian",
  title =        "{EAVE}: {Error-Aware Video Encoding} Supporting
                 Extended Energy\slash {QoS} Trade-offs for Mobile
                 Embedded Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy/QoS provisioning is challenging for video
                 applications over lossy wireless network with
                 power-constrained mobile handheld devices. In this
                 work, we exploit the inherent error tolerance of video
                 data to generate a range of acceptable operating points
                 by controlling the amount of errors in the system. In
                 particular, we propose an error-aware video encoding
                 technique, EAVE, that intentionally injects errors
                 while ensuring acceptable QoS. The expanded trade-off
                 space generated by EAVE allows system designers to
                 comparatively evaluate different operating points with
                 varying QoS and energy consumption by aggressively
                 exploiting error-resilience attributes, and could
                 potentially result in significant energy savings. The
                 novelty of our approach resides in active exploitation
                 of errors to vary the operating conditions for further
                 optimization of system parameters. Moreover, we present
                 the adaptivity of our approach by incorporating the
                 feedback from the decoding side to achieve the QoS
                 requirement under the dynamic network status. Our
                 experiments show that EAVE can reduce the energy
                 consumption for an encoding device by up to 37\% for a
                 video conferencing application over a wireless network
                 without quality degradation, compared to a standard
                 video encoding technique over test video streams.
                 Further, our experimental results demonstrate that EAVE
                 can expand the design space by 14 times with respect to
                 energy consumption and by 13 times with respect to
                 video quality (compared to a traditional approach
                 without active error exploitation) on average, over
                 test video streams.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chen:2012:ART,
  author =       "Mingsong Chen and Prabhat Mishra and Dhrubajyoti
                 Kalita",
  title =        "Automatic {RTL} Test Generation from {SystemC TLM}
                 Specifications",
  journal =      j-TECS,
  volume =       "11",
  number =       "2",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2220336.2220350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jul 27 18:57:33 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "SystemC transaction-level modeling (TLM) is widely
                 used to enable early exploration for both hardware and
                 software designs. It can reduce the overall design and
                 validation effort of complex system-on-chip (SOC)
                 architectures. However, due to lack of automated
                 techniques coupled with limited reuse of validation
                 efforts between abstraction levels, SOC validation is
                 becoming a major bottleneck. This article presents a
                 novel top-down methodology for automatically generating
                 register transfer-level (RTL) tests from SystemC TLM
                 specifications. It makes two important contributions:
                 (i) it proposes a method that can automatically
                 generate TLM tests using various coverage metrics, and
                 (ii) it develops a test refinement specification for
                 automatically converting TLM tests to RTL tests in
                 order to reduce overall validation effort. We have
                 developed a tool which incorporates these activities to
                 enable automated RTL test generation from SystemC TLM
                 specifications. Case studies using a router example and
                 a 64-bit Alpha AXP pipelined processor demonstrate that
                 our approach can achieve intended functional coverage
                 of the RTL designs, as well as capture various
                 functional errors and inconsistencies between
                 specifications and implementations.",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Plaks:2012:ESS,
  author =       "Toomas P. Plaks",
  title =        "Editorial: Special Section on {CAPA'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "39:1--39:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331148",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Paul:2012:PRC,
  author =       "Anand Paul and Yung-Chuan Jiang and Jhing-Fa Wang and
                 Jar-Ferr Yang",
  title =        "Parallel Reconfigurable Computing-Based Mapping
                 Algorithm for Motion Estimation in Advanced Video
                 Coding",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "40:1--40:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331149",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Computational load of motion estimation in advanced
                 video coding (AVC) standard is significantly high and
                 even worse for HDTV and super-resolution sequences. In
                 this article, a video processing algorithm is
                 dynamically mapped onto a new parallel reconfigurable
                 computing (PRC) architecture which consists of multiple
                 dynamic reconfigurable computing (DRC) units. First, we
                 construct a directed acyclic graph (DAG) to represent
                 video coding algorithms in which motion estimation is
                 the focus. A novel parallel partition approach is then
                 proposed to map motion estimation DAG onto the multiple
                 DRC units in a PRC system. This partitioning algorithm
                 is capable of design optimization of parallel
                 processing reconfigurable systems for a given number of
                 processing elements in different search ranges. This
                 speeds up the video processing with minimum
                 sacrifice.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Suris:2012:RSC,
  author =       "Jorge A. Sur{\'\i}s and Adolfo Recio and Peter
                 Athanas",
  title =        "{RapidRadio}: Signal Classification and Radio
                 Deployment Framework",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "41:1--41:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331151",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, the RapidRadio framework for signal
                 classification and receiver deployment is discussed.
                 The framework is a productivity-enhancing tool that
                 reduces the required knowledge base for implementing a
                 receiver on an FPGA-based SDR platform. The ultimate
                 objective of this framework is to identify unknown
                 signals and to build FPGA-based receivers capable of
                 receiving them. RapidRadio divides the process of radio
                 creation into two phases; the analysis phase and radio
                 synthesis phase. The analysis phase guides the user
                 through the process of classifying an unknown signal
                 and determining its modulation scheme and parameters,
                 resulting in a radio receiver model. In the second
                 phase, this model is transformed into a functional
                 receiver in an FPGA-based platform.",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mark:2012:HBC,
  author =       "Cindy Mark and Scott Y. L. Chin and Lesley Shannon and
                 Steven J. E. Wilton",
  title =        "Hierarchical Benchmark Circuit Generation for {FPGA}
                 Architecture Evaluation",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "42:1--42:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331152",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We describe a stochastic circuit generator that can be
                 used to automatically create benchmark circuits for use
                 in FPGA architecture studies. The circuits consist of a
                 hierarchy of interconnected modules, reflecting the
                 structure of circuits designed using a system-on-chip
                 design flow. Within each level of hierarchy, modules
                 can be connected in a bus, star, or dataflow
                 configuration. Our circuit generator is calibrated
                 based on a careful study of existing system-on-chip
                 circuits. We show that our benchmark circuits lead to
                 more realistic architectural conclusions than circuits
                 generated using previous generators.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reardon:2012:REE,
  author =       "Casey Reardon and Brian Holland and Alan D. George and
                 Greg Stitt and Herman Lam",
  title =        "{RCML}: An Environment for Estimation Modeling of
                 Reconfigurable Computing Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "43:1--43:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331153",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Reconfigurable computing (RC) is emerging as a
                 promising area for embedded computing, in which complex
                 systems must balance performance, flexibility, cost,
                 and power. The difficulty associated with RC
                 development suggests improved strategic planning and
                 analysis techniques can save significant development
                 time and effort. This article presents a new abstract
                 modeling language and environment, the RC Modeling
                 Language (RCML), to facilitate efficient design space
                 exploration of RC systems at the estimation modeling
                 level, that is, before building a functional
                 implementation. Two integrated analysis tools and case
                 studies, one analytical and one simulative, are
                 presented illustrating relatively accurate automated
                 analysis of systems modeled in RCML.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{DiBiagio:2012:AOA,
  author =       "Andrea {Di Biagio} and Giovanni Agosta and Martino
                 Sykora and Cristina Silvano",
  title =        "Architecture Optimization of Application-Specific
                 Implicit Instructions",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "44:1--44:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331154",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic configuration of application-specific implicit
                 instructions has been proposed to better exploit the
                 available parallelism at the instruction level in
                 pipelined processors. The support of such implicit
                 instruction issue-requires the pipeline to be extended
                 with a trigger table that describes the instruction
                 implicitly issued as a response to a value written into
                 a triggering register by a triggering instruction
                 (which may be an add or sub instruction). In this
                 article, we explore the design optimization of the
                 trigger table to maximize the number of instructions
                 that can be implicitly issued while keeping the limited
                 size of the trigger table. The concept of implicitly
                 issued instruction has been formally defined by
                 considering the inter-basic block analysis of control
                 and data dependencies. A compilation tool chain has
                 been developed to automatically identify the
                 optimization opportunities, taking into account the
                 constraints imposed by control and data dependencies as
                 well as by architectural limitations. The proposed
                 solutions have been applied to the case of a baseline
                 scalar MIPS processor where, for the selected set of
                 benchmarks (DSPStone and Mibench/automotive), we
                 obtained an average speedup of 17\%.",
  acknowledgement = ack-nhfb,
  articleno =    "44",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Napapetian:2012:ESS,
  author =       "Ani Napapetian and William Kaiser and Majid
                 Sarrafzadeh",
  title =        "Editorial: Special Section on {WHS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "45:1--45:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331155",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "45",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Guenterberg:2012:ASR,
  author =       "Eric Guenterberg and Hassan Ghasemzadeh and Roozbeh
                 Jafari",
  title =        "Automatic Segmentation and Recognition in Body Sensor
                 Networks Using a Hidden {Markov} Model",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "46:1--46:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331156",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One important application of body sensor networks is
                 action recognition. Action recognition often implicitly
                 requires partitioning sensor data into intervals, then
                 labeling the partitions according to the action that
                 each represents or as a non-action. The temporal
                 partitioning stage is called segmentation, and the
                 labeling is called classification. While many effective
                 methods exist for classification, segmentation remains
                 problematic. We present a technique inspired by
                 continuous speech recognition that combines
                 segmentation and classification using hidden Markov
                 models. This technique is distributed across several
                 sensor nodes. We show the results of this technique and
                 the bandwidth savings over full data transmission.",
  acknowledgement = ack-nhfb,
  articleno =    "46",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Pradhan:2012:AVJ,
  author =       "Gaurav N. Pradhan and B. Prabhakaran",
  title =        "Analyzing and Visualizing Jump Performance Using
                 Wireless Body Sensors",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "47:1--47:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331157",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Advancement in technology has led to the deployment of
                 body sensor networks (BSN) to monitor and sense human
                 activity in pervasive environments. Using multiple
                 wireless on-body systems, such as physiological data
                 monitoring and motion capture systems, body sensor
                 network data consists of heterogeneous physiologic and
                 motoric streams that form a multidimensional framework.
                 In this article, we analyze such high-dimensional body
                 sensor network data by proposing an efficient,
                 multidimensional factor analysis technique for
                 quantifying human performance and, at the same time,
                 providing visualization for performances of
                 participants in a low-dimensional space for easier
                 interpretation.",
  acknowledgement = ack-nhfb,
  articleno =    "47",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Thatte:2012:KEE,
  author =       "Gautam Thatte and Ming Li and Sangwon Lee and Adar
                 Emken and Shrikanth Narayanan and Urbashi Mitra and
                 Donna Spruijt-Metz and Murali Annavaram",
  title =        "{KNOWME}: An Energy-Efficient Multimodal Body Area
                 Network for Physical Activity Monitoring",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "48:1--48:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331158",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The use of biometric sensors for monitoring an
                 individual's health and related behaviors, continuously
                 and in real time, promises to revolutionize healthcare
                 in the near future. In an effort to better understand
                 the complex interplay between one's medical condition
                 and social, environmental, and metabolic parameters,
                 this article presents the KNOWME platform, a complete,
                 end-to-end, body area sensing system that integrates
                 off-the-shelf biometric sensors with a Nokia N95 mobile
                 phone to continuously monitor the metabolic signals of
                 a subject. With a current focus on pediatric obesity,
                 KNOWME employs metabolic signals to monitor and
                 evaluate physical activity. KNOWME development and
                 in-lab deployment studies have revealed three major
                 challenges: (1) the need for robustness to highly
                 varying operating environments due to subject-induced
                 variability, such as mobility or sensor placement; (2)
                 balancing the tension between achieving high fidelity
                 data collection and minimizing network energy
                 consumption; and (3) accurate physical activity
                 detection using a modest number of sensors. The KNOWME
                 platform described herein directly addresses these
                 three challenges. Design robustness is achieved by
                 creating a three-tiered sensor data collection
                 architecture. The system architecture is designed to
                 provide robust, continuous, multichannel data
                 collection and scales without compromising normal
                 mobile device operation. Novel physical activity
                 detection methods which exploit new representations of
                 sensor signals provide accurate and efficient physical
                 activity detection. The physical activity detection
                 method employs personalized training phases and
                 accounts for intersession variability. Finally,
                 exploiting the features of the hardware implementation,
                 a low-complexity sensor sampling algorithm is
                 developed, resulting in significant energy savings
                 without loss of performance.",
  acknowledgement = ack-nhfb,
  articleno =    "48",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Banerjee:2012:BAT,
  author =       "Ayan Banerjee and Sailesh Kandula and Tridib Mukherjee
                 and Sandeep K. S. Gupta",
  title =        "{BAND-AiDe}: a Tool for Cyber-Physical Oriented
                 Analysis and Design of Body Area Networks and Devices",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "49:1--49:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331159",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Body area networks (BANs) are networks of medical
                 devices implanted within or worn on the human body.
                 Analysis and verification of BAN designs require (i)
                 early feedback on the BAN design and (ii)
                 high-confidence evaluation of BANs without requiring
                 any hazardous, intrusive, and costly deployment. Any
                 design of BAN further has to ensure (i) the safety of
                 the human body, that is, limiting any undesirable
                 side-effects (e.g., heat dissipation) of BAN operations
                 (involving sensing, computation, and communication
                 among the devices) on the human body, and (ii) the
                 sustainability of the BAN operations, that is, the
                 continuation of the operations under constrained
                 resources (e.g., limited battery power in the devices)
                 without requiring any redeployments. This article uses
                 the Model Based Engineering (MBE) approach to perform
                 design and analysis of BANs. In this regard, first, an
                 abstract cyber-physical model of BANs, called BAN-CPS,
                 is proposed that captures the undesirable side-effects
                 of the medical devices (cyber) on the human body
                 (physical); second, a design and analysis tool, named
                 BAND-AiDe, is developed that allows specification of
                 BAN-CPS using industry standard Abstract Architecture
                 Description Language (AADL) and enables safety and
                 sustainability analysis of BANs; and third, the
                 applicability of BAND-AiDe is shown through a case
                 study using both single and a network of medical
                 devices for health monitoring applications.",
  acknowledgement = ack-nhfb,
  articleno =    "49",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Hanson:2012:AFE,
  author =       "Mark A. Hanson and Harry C. {Powell, Jr.} and Adam T.
                 Barth and John Lach",
  title =        "Application-Focused Energy-Fidelity Scalability for
                 Wireless Motion-Based Health Assessment",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "50:1--50:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331160",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Energy-fidelity trade-offs are central to the
                 performance of many technologies, but they are
                 essential in wireless body area sensor networks (BASNs)
                 due to severe energy and processing constraints and the
                 critical nature of certain healthcare applications.
                 On-node signal processing and compression techniques
                 can save energy by greatly reducing the amount of data
                 transmitted over the wireless channel, but lossy
                 techniques, capable of high compression ratios, can
                 incur a reduction in application fidelity. In order to
                 maximize system performance, these trade-offs must be
                 considered at runtime due to the dynamic nature of BASN
                 applications, including sensed data, operating
                 environments, user actuation, etc. BASNs therefore
                 require energy-fidelity scalability, so automated and
                 user-initiated trade-offs can be made dynamically. This
                 article presents a data rate scalability framework
                 within a motion-based health application context which
                 demonstrates the design of efficient and efficacious
                 wireless health systems.",
  acknowledgement = ack-nhfb,
  articleno =    "50",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Boulis:2012:IWC,
  author =       "Athanassios Boulis and Yuriy Tselishchev and Lavy
                 Libman and David Smith and Leif Hanlen",
  title =        "Impact of Wireless Channel Temporal Variation on {MAC}
                 Design for Body Area Networks",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "51:1--51:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331161",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We investigate the impact of wireless channel temporal
                 variations on the design of medium access control (MAC)
                 protocols for body area networks (BANs). Our
                 measurements-based channel model captures large and
                 small time-scale signal correlations, giving an
                 accurate picture of the signal variation, specifically,
                 the deep fades which are the features that mostly
                 affect the behavior of the MAC. We test the effect of
                 the channel model on the performance of the 802.15.4
                 MAC both in contention access mode and TDMA access
                 mode. We show that there are considerable differences
                 in the performance of the MAC compared to simulations
                 that do not model channel temporal variation.
                 Furthermore, explaining the behavior of the MAC under a
                 temporal varying channel, we can suggest specific
                 design choices for the emerging BAN MAC standard.",
  acknowledgement = ack-nhfb,
  articleno =    "51",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Fainekos:2012:ESS,
  author =       "Georgios Fainekos and Eric Goubault and Franjo
                 Ivanci{\'c} and Sriram Sankaranarayanan",
  title =        "Editorial: Special Section {VCPSS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "52:1--52:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331162",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "52",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wongpiromsarn:2012:VPC,
  author =       "Tichakorn Wongpiromsarn and Sayan Mitra and Andrew
                 Lamperski and Richard M. Murray",
  title =        "Verification of Periodically Controlled Hybrid
                 Systems: Application to an Autonomous Vehicle",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "53:1--53:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331163",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article introduces Periodically Controlled Hybrid
                 Automata (PCHA) for modular specification of embedded
                 control systems. In a PCHA, control actions that change
                 the control input to the plant occur roughly
                 periodically, while other actions that update the state
                 of the controller may occur in the interim. Such
                 actions could model, for example, sensor updates and
                 information received from higher-level planning modules
                 that change the set point of the controller. Based on
                 periodicity and subtangential conditions, a new
                 sufficient condition for verifying invariant properties
                 of PCHAs is presented. For PCHAs with polynomial
                 continuous vector fields, it is possible to check these
                 conditions automatically using, for example, quantifier
                 elimination or sum of squares decomposition. We examine
                 the feasibility of this automatic approach on a small
                 example. The proposed technique is also used to
                 manually verify safety and progress properties of a
                 fairly complex planner-controller subsystem of an
                 autonomous ground vehicle. Geometric properties of
                 planner-generated paths are derived which guarantee
                 that such paths can be safely followed by the
                 controller.",
  acknowledgement = ack-nhfb,
  articleno =    "53",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Girard:2012:VSL,
  author =       "Antoine Girard and Gang Zheng",
  title =        "Verification of Safety and Liveness Properties of
                 Metric Transition Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "54:1--54:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331164",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "We consider verification problems for transition
                 systems enriched with a metric structure. We believe
                 that these metric transition systems are particularly
                 suitable for the analysis of cyber-physical systems in
                 which metrics can be naturally defined on the numerical
                 variables of the embedded software and on the
                 continuous states of the physical environment. We
                 consider verification of bounded and unbounded safety
                 properties, as well as bounded liveness properties. The
                 transition systems we consider are nondeterministic,
                 finitely branching, and with a finite set of initial
                 states. Therefore, bounded safety/liveness properties
                 can always be verified by exhaustive exploration of the
                 system trajectories. However, this approach may be
                 intractable in practice, as the number of trajectories
                 usually grows exponentially with respect to the
                 considered bound. Furthermore, since the system we
                 consider can have an infinite set of states, exhaustive
                 exploration cannot be used for unbounded safety
                 verification. For bounded safety properties, we propose
                 an algorithm which combines exploration of the system
                 trajectories and state space reduction using merging
                 based on a bisimulation metric. The main novelty
                 compared to an algorithm presented recently by Lerda et
                 al. [2008] consists in introducing a tuning parameter
                 that improves the performance drastically. We also
                 establish a procedure that allows us to prove unbounded
                 safety from the result of the bounded safety algorithm
                 via a refinement step. We then adapt the algorithm to
                 handle bounded liveness verification. Finally, the
                 effectiveness of the approach is demonstrated by
                 applying it to the analysis of implementations of an
                 embedded control loop.",
  acknowledgement = ack-nhfb,
  articleno =    "54",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Seshia:2012:QAS,
  author =       "Sanjit A. Seshia and Alexander Rakhlin",
  title =        "Quantitative Analysis of Systems Using Game-Theoretic
                 Learning",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "55:1--55:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331165",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The analysis of quantitative properties, such as
                 timing and power, is central to the design of reliable
                 embedded software and systems. However, the
                 verification of such properties on a program is made
                 difficult by their heavy dependence on the program's
                 environment, such as the processor it runs on. Modeling
                 the environment by hand can be tedious, error prone,
                 and time consuming. In this article, we present a new
                 game-theoretic approach to analyzing quantitative
                 properties that is based on performing systematic
                 measurements to automatically learn a model of the
                 environment. We model the problem as a game between our
                 algorithm (player) and the environment of the program
                 (adversary) in which the player seeks to accurately
                 predict the property of interest, while the adversary
                 sets environment states and parameters. To solve this
                 problem, we employ a randomized strategy that
                 repeatedly tests the program along a linear-sized set
                 of program paths called basis paths, using the
                 resulting measurements to infer a weighted-graph model
                 of the environment from which quantitative properties
                 can be predicted. Test cases are automatically
                 generated using satisfiability modulo theories (SMT)
                 solving. We prove that our algorithm can, under certain
                 assumptions and with arbitrarily high probability,
                 accurately predict properties such as worst-case
                 execution time or estimate the distribution of
                 execution times. Experimental results for execution
                 time analysis demonstrate that our approach is
                 efficient, accurate, and highly portable.",
  acknowledgement = ack-nhfb,
  articleno =    "55",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wu:2012:MCB,
  author =       "Lan Wu and Wei Zhang",
  title =        "A Model Checking Based Approach to Bounding Worst-Case
                 Execution Time for Multicore Processors",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "56:1--56:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331166",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "As multicore processors are increasingly adopted in
                 industry, it has become a great challenge to accurately
                 bound the worst-case execution time (WCET) for
                 real-time systems running on multicore chips. This is
                 particularly true because of the inter-thread
                 interferences in accessing shared resources on
                 multicores, such as shared L2 caches, which can
                 significantly affect the performance but are very
                 difficult to be estimated statically. This article
                 proposes an approach to analyzing WCET for multicore
                 processors with shared L2 instruction caches by using a
                 model checking based method. We model each concurrent
                 real-time thread, including the inter-thread cache
                 interferences with a PROMELA process, and derive the
                 WCET by using a binary search algorithm. To reduce the
                 state explosion problem, we propose several techniques
                 for reducing the memory consumption by exploiting
                 domain-specific information. Our experiments indicate
                 that compared to the static analysis technique based on
                 extended ILP (integer linear programming), our approach
                 improves the tightness of WCET estimation by more than
                 31.1\% for the benchmarks we studied. However, due to
                 the inherent complexity of multicore timing analysis
                 and the state explosion problem, the model checking
                 based approach currently can only work with small
                 real-time kernels for dual-core processors.",
  acknowledgement = ack-nhfb,
  articleno =    "56",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tang:2012:UMS,
  author =       "Qinghui Tang and Sandeep K. S. Gupta and Georgios
                 Varsamopoulos",
  title =        "A Unified Methodology for Scheduling in Distributed
                 Cyber-Physical Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "57:1--57:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331167",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A distributed cyber-physical system (DCPS) may receive
                 and induce energy-based interference to and from its
                 environment. This article presents a model and an
                 associated methodology that can be used to (i) schedule
                 tasks in DCPSs to ensure that the thermal effects of
                 the task execution are within acceptable levels, and
                 (ii) verify that a given schedule meets the
                 constraints. The model uses coarse discretization of
                 space and linearity of interference. The methodology
                 involves characterizing the interference of the task
                 execution and fitting it into the model, then using the
                 fitted model to verify a solution or explore the
                 solution space.",
  acknowledgement = ack-nhfb,
  articleno =    "57",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nghiem:2012:TTI,
  author =       "Truong Nghiem and George J. Pappas and Rajeev Alur and
                 Antoine Girard",
  title =        "Time-Triggered Implementations of Dynamic
                 Controllers",
  journal =      j-TECS,
  volume =       "11",
  number =       "S2",
  pages =        "58:1--58:??",
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2331147.2331168",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Sep 6 09:57:10 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Bridging the gap between model-based design and
                 platform-based implementation is one of the critical
                 challenges for embedded software systems. In the
                 context of embedded control systems that interact with
                 an environment, a variety of errors due to
                 quantization, delays, and scheduling policies may
                 generate executable code that does not faithfully
                 implement the model-based design. In this article, we
                 show that the performance gap between the model-level
                 semantics of linear dynamic controllers, for example,
                 the proportional-integral-derivative (PID) controllers
                 and their implementation-level semantics, can be
                 rigorously quantified if the controller implementation
                 is executed on a predictable time-triggered
                 architecture. Our technical approach uses lifting
                 techniques for periodic time-varying linear systems in
                 order to compute the exact error between the model
                 semantics and the execution semantics. Explicitly
                 computing the impact of the implementation on overall
                 system performance allows us to compare and partially
                 order different implementations with various scheduling
                 or timing characteristics.",
  acknowledgement = ack-nhfb,
  articleno =    "58",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dong:2012:UAS,
  author =       "Qi Dong and Donggang Liu",
  title =        "Using Auxiliary Sensors for Pairwise Key Establishment
                 in {WSN}",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "59:1--59:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345771",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many techniques have been developed recently for
                 establishing pairwise keys in sensor networks. However,
                 some of them are vulnerable to a few compromised sensor
                 nodes, while others could involve expensive protocols
                 for establishing keys. This article introduces a much
                 better alternative that can achieve both high
                 resilience to node compromises and high efficiency in
                 key establishment. The main idea is to deploy a small
                 number of additional sensor nodes, called assisting
                 nodes, to help key establishment between sensor nodes.
                 The proposed approach has many advantages over existing
                 approaches. In particular, a sensor node only needs to
                 make a few local communications and perform a few
                 efficient hash operations to setup a key with any other
                 sensor node in the network at a very high probability.
                 The majority of sensor nodes only need to store a
                 single key. Besides, it also provides high resilience
                 to node compromises. The theoretical analysis,
                 simulation studies, and experiments on TelosB sensor
                 motes also demonstrate the advantages of this key
                 establishment protocol in sensor networks.",
  acknowledgement = ack-nhfb,
  articleno =    "59",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Arora:2012:ILM,
  author =       "Divya Arora and Najwa Aaraj and Anand Raghunathan and
                 Niraj K. Jha",
  title =        "{INVISIOS}: a Lightweight, Minimally Intrusive Secure
                 Execution Environment",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "60:1--60:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345772",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many information security attacks exploit
                 vulnerabilities in ``trusted'' and privileged software
                 executing on the system, such as the operating system
                 (OS). On the other hand, most security mechanisms
                 provide no immunity to security-critical user
                 applications if vulnerabilities are present in the
                 underlying OS. While technologies have been proposed
                 that facilitate isolation of security-critical
                 software, they require either significant computational
                 resources and are hence not applicable to many
                 resource-constrained embedded systems, or necessitate
                 extensive redesign of the underlying processors and
                 hardware. In this work, we propose INVISIOS: a
                 lightweight, minimally intrusive hardware-software
                 architecture to make the execution of security-critical
                 software invisible to the OS, and hence protected from
                 its vulnerabilities. The INVISIOS software architecture
                 encapsulates the security-critical software into a
                 self-contained software module. While this module is
                 part of the kernel and is run with kernel-level
                 privileges, its code, data, and execution are
                 transparent to and protected from the rest of the
                 kernel. The INVISIOS hardware architecture consists of
                 simple add-on hardware components that are responsible
                 for bootstrapping the secure core, ensuring that it is
                 exercised by applications in only permitted ways, and
                 enforcing the isolation of its code and data. We
                 implemented INVISIOS by enhancing a full-system
                 emulator and Linux to model the proposed software and
                 hardware enhancements, and applied it to protect a
                 commercial cryptographic library. Our experiments
                 demonstrate that INVISIOS is capable of facilitating
                 secure execution at very small overheads, making it
                 suitable for resource-constrained embedded systems and
                 systems-on-chip.",
  acknowledgement = ack-nhfb,
  articleno =    "60",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Izosimov:2012:SOF,
  author =       "Viacheslav Izosimov and Paul Pop and Petru Eles and
                 Zebo Peng",
  title =        "Scheduling and Optimization of Fault-Tolerant Embedded
                 Systems with Transparency\slash Performance
                 Trade-Offs",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "61:1--61:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345773",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, we propose a strategy for the
                 synthesis of fault-tolerant schedules and for the
                 mapping of fault-tolerant applications. Our techniques
                 handle transparency/performance trade-offs and use the
                 fault-occurrence information to reduce the overhead due
                 to fault tolerance. Processes and messages are
                 statically scheduled, and we use process reexecution
                 for recovering from multiple transient faults. We
                 propose a fine-grained transparent recovery, where the
                 property of transparency can be selectively applied to
                 processes and messages. Transparency hides the recovery
                 actions in a selected part of the application so that
                 they do not affect the schedule of other processes and
                 messages. While leading to longer schedules,
                 transparent recovery has the advantage of both improved
                 debuggability and less memory needed to store the
                 fault-tolerant schedules.",
  acknowledgement = ack-nhfb,
  articleno =    "61",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yang:2012:PAA,
  author =       "Shengqi Yang and Pallav Gupta and Marilyn Wolf and
                 Dimitrios Serpanos and Vijaykrishnan Narayanan and Yuan
                 Xie",
  title =        "Power Analysis Attack Resistance Engineering by
                 Dynamic Voltage and Frequency Scaling",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "62:1--62:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345774",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a novel approach to cryptosystem
                 design to prevent power analysis attacks. Such attacks
                 infer program behavior by continuously monitoring the
                 power supply current going into the processor core.
                 They form an important class of security attacks. Our
                 approach is based on dynamic voltage and frequency
                 scaling (DVFS), which hides processor state to make it
                 harder for an attacker to gain access to a secure
                 system. Three designs are studied to test the efficacy
                 of the DVFS method against power analysis attacks. The
                 advanced realization of our cryptosystem is presented
                 which achieves enough high power and time trace
                 entropies to block various kinds of power analysis
                 attacks in the DES algorithm. We observed 27\% energy
                 reduction and 16\% time overhead in these algorithms.
                 Finally, DVFS hardness analysis is presented.",
  acknowledgement = ack-nhfb,
  articleno =    "62",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Shokry:2012:HSS,
  author =       "Hesham Shokry and Hatem M. El-Boghdadi",
  title =        "On Heuristic Solutions to the Simple Offset Assignment
                 Problem in Address-Code Optimization",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "63:1--63:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345775",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The increasing demand for more functionality in
                 embedded systems applications nowadays requires
                 efficient generation of compact code for embedded DSP
                 processors. Because such processors have highly
                 irregular data-paths, compilers targeting those
                 processors are challenged with the automatic generation
                 of optimized code with competent quality comparable to
                 hand-crafted code. A major issue in code-generation is
                 to optimize the placement of program variables in ROM
                 relative to each other so as to reduce the overhead
                 instructions dedicated for address computations. Modern
                 DSP processors are typically shipped with a feature
                 called Address Generation Unit (AGU) that provides
                 efficient address-generation instructions for accessing
                 program variables. Compilers targeting those processors
                 are expected to exploit the AGU to optimize variables
                 assignment. This article focuses on one of the basic
                 offset-assignment problems; the Simple Offset
                 Assignment (SOA) problem, where the AGU has only one
                 Address Register and no Modify Registers. The notion of
                 Tie-Break Function, TBF, introduced by Leupers and
                 Marwedel [1996], has been used to guide the placement
                 of variables in memory. In this article, we introduce a
                 more effective form of the TBF; the Effective
                 Tie-Breaking Function, ETBF, and show that the ETBF is
                 better at guiding the variables placement process.
                 Underpinning ETBF is the fact that program variables
                 are placed in memory in sequence, with each variable
                 having only two neighbors. We applied our technique to
                 randomly generated graphs as well as to real-world code
                 from the OffsetStone testbench [2010]. In previous
                 work [Ali et al. 2008], our technique showed up to 7\%
                 reduction in overhead when applied to
                 randomly-generated problem instances. We report in this
                 article on a further experiment of our technique on
                 real-code from the OffsetStone testbench. Despite the
                 substantial improvement our technique has achieved when
                 applied to random problem instances, we found that it
                 shows slight overhead reduction when applied to
                 real-world instances in OffsetStone, which agrees with
                 similar existing experiments. We analyze these results
                 and show that the ETBF defaults to TBF.",
  acknowledgement = ack-nhfb,
  articleno =    "63",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Girodias:2012:IMO,
  author =       "Bruno Girodias and Luiza Gheorghe Iugan and Youcef
                 Bouchebaba and Gabriela Nicolescu and El Mostapha
                 Abouhamid and Michel Langevin and Pierre Paulin",
  title =        "Integrating Memory Optimization with Mapping
                 Algorithms for Multi-Processors System-on-Chip",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "64:1--64:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345776",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Due to their great ability to parallelize at a very
                 high integration level, Multi-Processors
                 Systems-on-Chip (MPSoCs) are good candidates for
                 systems and applications such as multimedia. Memory is
                 becoming a key player for significant improvements in
                 these applications (power, performance and area). The
                 large amount of data manipulated by these applications
                 requires high-capacity computing and memory. Lately,
                 new programming models have been introduced. This leads
                 to the need of new optimization and mapping techniques
                 suitable for embedded systems and their programming
                 models. This article presents novel approaches for
                 combining memory optimization with mapping of
                 data-driven applications while considering
                 anti-dependence conflicts. Two different approaches are
                 studied and integrated with existing mapping
                 algorithms. The first approach (based on heuristic
                 algorithms) keeps the graph transformation for memory
                 optimization stage from the mapping stage and enables
                 their combination in a design flow. The second approach
                 (based on evolutionary algorithms) combines these two
                 stages and integrates them in a unique stage. Some
                 significant improvements are obtained for memory gain,
                 communication load and physical links.",
  acknowledgement = ack-nhfb,
  articleno =    "64",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhong:2012:SNL,
  author =       "Ziguo Zhong and Tian He",
  title =        "Sensor Node Localization with Uncontrolled Events",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "65:1--65:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345777",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Event-driven localization has been proposed as a
                 low-cost solution for node positioning in wireless
                 sensor networks. In order to eliminate the costly
                 requirement for accurate event control in existing
                 methods, we present a practical design using
                 uncontrolled events. The main idea is to estimate both
                 event generation parameters and the location of sensor
                 nodes simultaneously, by processing node sequences that
                 can be easily obtained from event detections. Besides
                 the basic design, we proposed two enhancements to
                 further extract information embedded in node orderings
                 for two scenarios: (i) node density is high; and (ii)
                 abundant events are available. To demonstrate the
                 generality of our design, both straight-line scan and
                 circular wave propagation events are addressed in the
                 article, and we evaluated the design with extensive
                 simulation as well as a testbed implementation with 41
                 MICAz motes. Results show that with only randomly
                 generated events, our design can effectively localize
                 nodes with great flexibility while adding little extra
                 cost at the resource constrained sensor node side. In
                 addition, localization via uncontrolled events provides
                 a potential option of achieving node positioning
                 through long-term ambient events.",
  acknowledgement = ack-nhfb,
  articleno =    "65",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kumar:2012:ECI,
  author =       "Karthik Kumar and Yamini Nimmagadda and Yung-Hsiang
                 Lu",
  title =        "Energy Conservation for Image Retrieval on Mobile
                 Systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "66:1--66:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345779",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Mobile systems such as PDAs and cell phones play an
                 increasing role in handling visual contents such as
                 images. Thousands of images can be stored in a mobile
                 system with the advances in storage technology: this
                 creates the need for better organization and retrieval
                 of these images. Content Based Image Retrieval (CBIR)
                 is a method to retrieve images based on their visual
                 contents. In CBIR, images are compared by matching
                 their numerical representations called features; CBIR
                 is computation and memory intensive and consumes
                 significant amounts of energy. This article examines
                 energy conservation for CBIR on mobile systems. We
                 present three improvements to save energy while
                 performing the computation on the mobile system:
                 selective loading, adaptive loading, and caching
                 features in memory. Using these improvements adaptively
                 reduces the features to be loaded into memory for each
                 search. The reduction is achieved by estimating the
                 difficulty of the search. If the images in the
                 collection are dissimilar, fewer features are
                 sufficient; less computation is performed and energy
                 can be saved. We also consider the effect of
                 consecutive user queries and show how features can be
                 cached in memory to save energy. We implement a CBIR
                 algorithm on an HP iPAQ hw6945 and show that these
                 improvements can save energy and allow CBIR to scale up
                 to 50,000 images on a mobile system. We further
                 investigate if energy can be saved by migrating parts
                 of the computation to a server, called computation
                 offloading. We analyze the impact of the wireless
                 bandwidth, server speed, number of indexed images, and
                 the number of image queries on the energy consumption.
                 Using our scheme, CBIR can be made energy efficient
                 under all conditions.",
  acknowledgement = ack-nhfb,
  articleno =    "66",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2012:IMR,
  author =       "Jaehwan John Lee and Xiang Xiao",
  title =        "Instant Multiunit Resource Hardware Deadlock Detection
                 Scheme for System-on-Chips",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "67:1--67:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345780",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, a brand new method of determining
                 deadlock is presented. Most previous deadlock detection
                 methods are algorithmic in the sense that they usually
                 leverage some forms of Resource Allocation Graph (RAG)
                 representations and then algorithms are devised to
                 manipulate such representations in order to detect
                 deadlock using information contained in the graph.
                 Different from all previous methods, the proposed
                 method actualizes the RAG with a digital circuit and
                 uses it as a token-transmitting network. By supplying
                 special input signals (tokens) to the network and
                 observing the output tokens from the network, it is
                 easier to identify which process nodes are reachable
                 from each resource node in the graph. Using the
                 reachability information, deadlock can be detected
                 immediately. The time required to obtain the
                 reachability information is determined by how fast the
                 combinational circuit operates. Compared with previous
                 algorithmic methods, the proposed deadlock detection
                 can be deemed instant. We show that the proposed method
                 is an order of magnitude faster than the previous
                 fastest hardware mechanism and several orders of
                 magnitude faster than traditional software-based
                 algorithms.",
  acknowledgement = ack-nhfb,
  articleno =    "67",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zappi:2012:NLP,
  author =       "Piero Zappi and Daniel Roggen and Elisabetta Farella
                 and Gerhard Tr{\"o}ster and Luca Benini",
  title =        "Network-Level Power-Performance Trade-Off in Wearable
                 Activity Recognition: a Dynamic Sensor Selection
                 Approach",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "68:1--68:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345781",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wearable gesture recognition enables context aware
                 applications and unobtrusive HCI. It is realized by
                 applying machine learning techniques to data from
                 on-body sensor nodes. We present an gesture recognition
                 system minimizing power while maintaining a run-time
                 application defined performance target through dynamic
                 sensor selection. Compared to the non managed approach
                 optimized for recognition accuracy (95\% accuracy), our
                 technique can extend network lifetime by 4 times with
                 accuracy {$>$}90\% and by 9 times with accuracy
                 {$>$}70\%. We characterize the approach and outline its
                 applicability to other scenarios.",
  acknowledgement = ack-nhfb,
  articleno =    "68",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ambrose:2012:RII,
  author =       "Jude A. Ambrose and Roshan G. Ragel and Sri
                 Parameswaran",
  title =        "Randomized Instruction Injection to Counter Power
                 Analysis Attacks",
  journal =      j-TECS,
  volume =       "11",
  number =       "3",
  pages =        "69:1--69:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2345770.2345782",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Mon Oct 22 10:44:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Side-channel attacks in general and power analysis
                 attacks in particular are becoming a major security
                 concern in embedded systems. Countermeasures proposed
                 against power analysis attacks are data and table
                 masking, current flattening, dummy instruction
                 insertion and bit-flips balancing. All these techniques
                 are either susceptible to multi-order power analysis
                 attack, not sufficiently generic to cover all
                 encryption algorithms, or burden the system with high
                 area, run-time or energy cost. In this article, we
                 propose a randomized instruction injection technique (
                 RIJID ) that overcomes the pitfalls of previous
                 countermeasures. RIJID scrambles the power profile of a
                 cryptographic application by injecting random
                 instructions at random points of execution and
                 therefore protects the system against power analysis
                 attacks. Two different ways of triggering the
                 instruction injection are also presented: (1)
                 softRIJID, a hardware/software approach, where special
                 instructions are used in the code for triggering the
                 injection at runtime; and (2) autoRIJID, a hardware
                 approach, where the code injection is triggered by the
                 processor itself via detecting signatures of encryption
                 routines at runtime. A novel signature detection
                 technique is also introduced for identifying encryption
                 routines within application programs at runtime.
                 Further, a simple obfuscation metric (RIJIDindex)
                 based on cross-correlation that measures the scrambling
                 provided by any code injection technique is introduced,
                 which coarsely indicates the level of scrambling
                 achieved. Our processor models cost 1.9\% additional
                 area in the hardware/software approach and 1.2\% in the
                 hardware approach for a RISC based processor, and costs
                 on average 29.8\% in runtime and 27.1\% in energy for
                 the former and 25.0\% in runtime and 28.5\% in energy
                 for the later, for industry standard cryptographic
                 applications.",
  acknowledgement = ack-nhfb,
  articleno =    "69",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Pimentel:2012:ISS,
  author =       "Andy D. Pimentel and Naehyuck Chang and Mladen
                 Berekovic",
  title =        "Introduction to special section {ESTIMedia'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "70:1--70:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "70",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Paterna:2012:VTW,
  author =       "Francesco Paterna and Andrea Acquaviva and Francesco
                 Papariello and Giuseppe Desoli and Luca Benini",
  title =        "Variability-tolerant workload allocation for {MPSoC}
                 energy minimization under real-time constraints",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "71:1--71:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Sub-50nm CMOS technologies are affected by significant
                 variability, which causes power and performance
                 variations among nominally similar cores in MPSoC
                 platforms. This undesired heterogeneity threatens
                 execution predictability and energy efficiency. We
                 propose two techniques to allocate sets of
                 barrier-synchronized tasks. The first technique models
                 allocation as an ILP and achieves optimal results, but
                 requires an offline solver. The second technique adopts
                 a two-stage heuristic approach, and it can be adapted
                 to work online. We tested our approach on the virtual
                 prototype of a next-generation industrial multicore
                 platform. Experimental results demonstrate that our
                 approach minimizes deadline violations while increasing
                 energy efficiency.",
  acknowledgement = ack-nhfb,
  articleno =    "71",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tsutsui:2012:HTP,
  author =       "Hiroshi Tsutsui and Koichi Hattori and Hiroyuki Ochi
                 and Yukihiro Nakamura",
  title =        "A high-throughput pipelined parallel architecture for
                 {JPEG XR} encoding",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "72:1--72:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "JPEG XR is an emerging image coding standard, based on
                 HD Photo developed by Microsoft Corporation. It
                 supports high compression performance twice as high as
                 the de facto image coding system, namely, JPEG, and
                 also has an advantage over JPEG 2000 in terms of
                 computational cost. JPEG XR is expected to be
                 widespread for many devices including embedded systems
                 in the near future. In this article, we propose a novel
                 architecture for JPEG XR encoding. In previous
                 architectures, entropy coding was the throughput
                 bottleneck because it was implemented as a sequential
                 algorithm to handle data with dependency. We found that
                 there is no dependency in intra-macroblock data, and we
                 could safely pipeline all the encoding processes
                 including the entropy coding. In addition, each module
                 of our architecture, which can be regarded as a
                 pipeline stage, can be parallelized. As a result, our
                 architecture can achieve 12.8 pixel/cycle at its
                 maximum. To demonstrate our architecture, we designed
                 three versions of our architecture with different
                 degrees of parallelism of one, two, and four. Our
                 four-way parallel architecture achieves 579 Mpixel/sec
                 at 181MHz clock frequency for grayscale images.",
  acknowledgement = ack-nhfb,
  articleno =    "72",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Kim:2012:XFM,
  author =       "Minyoung Kim and Mark-Oliver Stehr and Carolyn Talcott
                 and Nikil Dutt and Nalini Venkatasubramanian",
  title =        "{xTune}: a formal methodology for cross-layer tuning
                 of mobile embedded systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "73:1--73:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Resource-limited mobile embedded systems can benefit
                 greatly from dynamic adaptation of system parameters.
                 We propose a novel approach that employs iterative
                 tuning using lightweight formal verification at runtime
                 with feedback for dynamic adaptation. One objective of
                 this approach is to enable trade-off analysis across
                 multiple layers (e.g., application, middleware, OS) and
                 predict the possible property violations as the system
                 evolves dynamically over time. Specifically, an
                 executable formal specification is developed for each
                 layer of the mobile system under consideration. The
                 formal specification is then analyzed using statistical
                 property checking and statistical quantitative
                 analysis, to determine the impact of various resource
                 management policies for achieving desired timing/QoS
                 properties. Integration of formal analysis with dynamic
                 behavior from system execution results in a feedback
                 loop that enables model refinement and further
                 optimization of policies and parameters. We demonstrate
                 the applicability of this approach to the adaptive
                 provisioning of resource-limited distributed real-time
                 systems using a mobile multimedia case study.",
  acknowledgement = ack-nhfb,
  articleno =    "73",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Dick:2012:ISS,
  author =       "Robert Dick and Li Shang and Nikil Dutt",
  title =        "Introduction to special section {SCPS'09}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "74:1--74:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "74",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Koutsoukos:2012:PAM,
  author =       "Xenofon Koutsoukos and Nicholas Kottenstette and
                 Joseph Hall and Emeka Eyisi and Heath Leblanc and
                 Joseph Porter and Janos Sztipanovits",
  title =        "A passivity approach for model-based compositional
                 design of networked control systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "75:1--75:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The integration of physical systems through computing
                 and networking has become pervasive, a trend now known
                 as cyber-physical systems (CPS). Functionality in CPS
                 emerges from the interaction of networked computational
                 and physical objects. System design and integration are
                 particularly challenging because fundamentally
                 different physical and computational design concerns
                 intersect. The impact of these interactions is the loss
                 of compositionality which creates tremendous
                 challenges. The key idea in this article is to use
                 passivity for decoupling the control design of
                 networked systems from uncertainties such as time
                 delays and packet loss, thus providing a fundamental
                 simplification strategy that limits the complexity of
                 interactions. The main contribution is the application
                 of the approach to an experimental case study of a
                 networked multi-robot system. We present a networked
                 control architecture that ensures the overall system
                 remains stable in spite of implementation uncertainties
                 such as network delays and data dropouts, focusing on
                 the technical details required for the implementation.
                 We describe a prototype domain-specific modeling
                 language and automated code generation tools for the
                 design of networked control systems on top of passivity
                 that facilitate effective system configuration,
                 deployment, and testing. Finally, we present
                 experimental evaluation results that show decoupling of
                 interlayer interactions.",
  acknowledgement = ack-nhfb,
  articleno =    "75",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Shin:2012:CTC,
  author =       "Donghwa Shin and Jaehyun Park and Younghyun Kim and
                 Jaeam Seo and Naehyuck Chang",
  title =        "Control-theoretic cyber-physical system modeling and
                 synthesis: a case study of an active direct methanol
                 fuel cell",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "76:1--76:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A joint optimization of the physical system and the
                 cyber world is one of the key problems in the design of
                 a cyber-physical system (CPS). The major mechanical
                 forces and/or chemical reactions in a plant are
                 commonly modified by actuators in the balance-of-plant
                 (BOP) system. More powerful actuators requires more
                 power, but generally increase the response of the
                 physical system powered by the electrical energy
                 generated by the physical system. To maximize the
                 overall output of a power generating plant therefore
                 requires joint optimization of the physical system and
                 the cyber world, and this is a key factor in the design
                 of a CPS. We introduce a systematic approach to the
                 modeling and synthesis of a CPS that emphasize joint
                 power optimization, using an active direct methanol
                 fuel cell (DMFC) as a case study. Active DMFC systems
                 are superior to passive DMFCs in terms of fuel
                 efficiency thanks to their BOP system, which includes
                 pumps, air blowers, and fans. However, designing a
                 small-scale active DMFC with the best overall system
                 efficiency requires the BOP system to be jointly
                 optimized with the DMFC stack operation, because the
                 BOP components are powered by the stack. Our approach
                 to this synthesis problem involves (i) BOP system
                 characterization, (ii) integrated DMFC system modeling,
                 (iii) configuring a system for the maximum net power
                 output through design space exploration, (iv) synthesis
                 of feedback control tasks, and (v) implementation.",
  acknowledgement = ack-nhfb,
  articleno =    "76",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Malik:2012:SLA,
  author =       "Avinash Malik and Zoran Salcic and Christopher Chong
                 and Salman Javed",
  title =        "System-level approach to the design of a smart
                 distributed surveillance system using systemj",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "77:1--77:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Distributed surveillance systems represent a class of
                 sensor networks used for object location and tracking,
                 road traffic monitoring, security, and other purposes.
                 They are very complex to describe, design, and run.
                 Because of their sensitivity, they need to be carefully
                 designed and validated. We present a system-level
                 approach to modeling and designing such systems using a
                 new system-level programming language, SystemJ, which
                 enables designers to describe computational and
                 communication parts of such applications in a highly
                 abstract manner. The designed system can be modeled and
                 validated even before deployment and in that way
                 contribute to the overall reliability and
                 trustworthiness of such systems. As an additional tool,
                 the design environment for specification of the
                 surveillance system topology, physical and
                 communication properties, selected sensors and their
                 interconnectivity with the computing resources was
                 developed. This tool enables easy composition of
                 multiple sensors and their respective controllers,
                 capturing changes of configuration of the system and
                 underlying communication, and automatic generation of
                 the formal description of the surveillance system. This
                 description is then used for the generation of
                 executable code and/or the templates for detailed
                 SystemJ application-specific code, as well as for
                 generation of the operator GUI in a surveillance
                 system.",
  acknowledgement = ack-nhfb,
  articleno =    "77",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Yoong:2012:ICC,
  author =       "Li Hsien Yoong and Partha S. Roop and Zoran Salcic",
  title =        "Implementing constrained cyber-physical systems with
                 {IEC 61499}",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "78:1--78:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPS) are integrations of
                 computation and control with sensing and actuation of
                 the physical environment. Typically, such systems
                 consist of embedded computers that monitor and control
                 physical processes in a feedback loop. While modern
                 electronic systems are increasingly characterized as
                 CPS, their design and synthesis still rely on
                 traditional methods, which lack systematic and
                 automated techniques for accomplishment. Recently, IEC
                 61499 has been proposed as a standard for designing
                 industrial process-control and measurement systems. It
                 prescribes a component-based approach for developing
                 industrial automation software using function blocks.
                 Executable code can then be automatically generated and
                 simulated from these function blocks. This bodes well
                 for designers of CPS, who are more likely to be experts
                 in specific industrial domains, rather than in computer
                 science. The intuitive graphical nature and automatic
                 code synthesis of IEC 61499 programs will alleviate the
                 programming burden of industrial engineers, while
                 ensuring more reliable software. While software
                 synthesis from IEC 61499 programs is not new, the
                 generation of efficient code from them has been
                 wanting. This has made it difficult for function blocks
                 to be used in software development for
                 resource-constrained embedded controllers commonly
                 employed in CPS. To address this, we present an
                 approach that can generate very efficient code from
                 function block descriptions. Experimental results from
                 a benchmark suite shows that our approach produces
                 substantially faster and smaller code compared to
                 existing techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "78",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Subramanian:2012:GOP,
  author =       "Varun Subramanian and Michael Gilberti and Alex Doboli
                 and Dan Pescaru",
  title =        "A goal-oriented programming framework for grid sensor
                 networks with reconfigurable embedded nodes",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "79:1--79:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems (CPS) are large, distributed
                 embedded systems integrated with various sensors and
                 actuators. CPS are rapidly emerging as an important
                 computing paradigm in many modern applications.
                 Developing CPS applications is currently challenging
                 due to the sheer complexity of the related
                 functionality as well as the broad set of constraints
                 and unknowns that must be tackled during operation.
                 This article presents a novel high-level programming
                 model and the supporting optimization and middleware
                 routines for executing applications on
                 physically-distributed networks of reconfigurable
                 embedded systems. The proposed model describes the
                 optimization goals, sensing inputs, actuation outputs,
                 events, and constraints of an application, while
                 leaving to the compiler and execution environment the
                 task of optimally implementing the derived
                 functionality. Experimental results discuss the
                 additional performance optimizations enabled by the
                 proposed model, and the timing and power consumption of
                 the middleware routines, and present a temperature
                 monitoring application implemented on a network of
                 reconfigurable, embedded processors.",
  acknowledgement = ack-nhfb,
  articleno =    "79",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Tan:2012:ACF,
  author =       "Rui Tan and Guoliang Xing and Xue Liu and Jianguo Yao
                 and Zhaohui Yuan",
  title =        "Adaptive calibration for fusion-based cyber-physical
                 systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "80:1--80:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Many Cyber-Physical Systems (CPS) are composed of
                 low-cost devices that are deeply integrated with
                 physical environments. As a result, the performance of
                 a CPS system is inevitably undermined by various
                 physical uncertainties, which include stochastic
                 noises, hardware biases, unpredictable environment
                 changes, and dynamics of the physical process of
                 interest. Traditional solutions to these issues (e.g.,
                 device calibration and collaborative signal processing)
                 work in an open-loop fashion and hence often fail to
                 adapt to the uncertainties after system deployment. In
                 this article, we propose an adaptive system-level
                 calibration approach for a class of CPS systems whose
                 primary objective is to detect events or targets of
                 interest. Through collaborative data fusion, our
                 calibration approach features a feedback control loop
                 that exploits system heterogeneity to mitigate the
                 impact of aforementioned uncertainties on the system
                 performance. In contrast to existing heuristic-based
                 solutions, our control-theoretical calibration
                 algorithm can ensure provable system stability and
                 convergence. We also develop a routing algorithm for
                 fusion-based multihop CPS systems that is robust to
                 communication unreliability and delay. Our approach is
                 evaluated by both experiments on a testbed of Tmotes as
                 well as extensive simulations based on data traces
                 gathered from a real vehicle detection experiment. The
                 results demonstrate that our calibration algorithm
                 enables a CPS system to maintain the optimal sensing
                 performance in the presence of various system and
                 environmental dynamics.",
  acknowledgement = ack-nhfb,
  articleno =    "80",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nam:2012:MTI,
  author =       "Min-Young Nam and Kyungtae Kang and Rodolfo Pellizzoni
                 and Kyung-Joon Park and Jung-Eun Kim and Lui Sha",
  title =        "Modeling towards incremental early analyzability of
                 networked avionics systems using virtual integration",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "81:1--81:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "With the advance of hardware technology, more features
                 are incrementally added to already existing networked
                 systems. Avionics has a stronger tendency to use
                 preexisting applications due to its complexity and
                 scale. As resource sharing becomes intense among the
                 network and the computing modules, it has become a
                 difficult task for the system designer to make
                 confident architectural decisions even for incremental
                 changes. Providing a tailored environment to model and
                 analyze incremental changes requires a combination of
                 software tools and hardware support. We have built a
                 virtual integration tool called ASIIST which can
                 provide a worst-case end-to-end latency of data that is
                 sent through a network and the internal bus
                 architecture of the end-systems. Also, we have devised
                 a new real-time switching algorithm which guarantees
                 the worst-case network delay of preexisting network
                 traffic under feasible conditions. With the real-time
                 switch support, ASIIST can provide an early modularized
                 analysis of the end-to-end latency to make
                 architectural design choices and incremental changes
                 easier for the user.",
  acknowledgement = ack-nhfb,
  articleno =    "81",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Pajic:2012:RAE,
  author =       "Miroslav Pajic and Alexander Chernoguzov and Rahul
                 Mangharam",
  title =        "Robust architectures for embedded wireless network
                 control and actuation",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "82:1--82:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Networked cyber-physical systems are fundamentally
                 constrained by the tight coupling and closed-loop
                 control of physical processes. To address actuation in
                 such closed-loop wireless control systems there is a
                 strong need to rethink the communication architectures
                 and protocols for reliability, coordination, and
                 control. We introduce the Embedded Virtual Machine
                 (EVM), a programming abstraction where controller tasks
                 with their control and timing properties are maintained
                 across physical node boundaries and functionality is
                 capable of migrating to the most competent set of
                 physical controllers. In the context of process and
                 discrete control, an EVM is the distributed runtime
                 system that dynamically selects primary-backup sets of
                 controllers given spatial and temporal constraints of
                 the underlying wireless network. EVM-based algorithms
                 allow network control algorithms to operate seamlessly
                 over less reliable wireless networks with topological
                 changes. They introduce new capabilities such as
                 predictable outcomes during sensor/actuator failure,
                 adaptation to mode changes, and runtime optimization of
                 resource consumption. An automated design flow from
                 Simulink to platform-independent domain-specific
                 languages, and subsequently, to platform-dependent code
                 generation is presented. Through case studies in
                 discrete and process control we demonstrate the
                 capabilities of EVM-based wireless network control
                 systems.",
  acknowledgement = ack-nhfb,
  articleno =    "82",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lakshmanan:2012:OPM,
  author =       "Karthik Lakshmanan and Dionisio {De Niz} and
                 Ragunathan (Raj) Rajkumar and Gabriel Moreno",
  title =        "Overload provisioning in mixed-criticality
                 cyber-physical systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "83:1--83:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-physical systems are an emerging class of
                 applications that require tightly coupled interaction
                 between the computational and physical worlds. These
                 systems are typically realized using sensor/actuator
                 interfaces connected with processing backbones. Safety
                 is a primary concern in cyber-physical systems since
                 the actuators directly influence the physical world.
                 However, unexpected or unusual conditions in the
                 physical world can manifest themselves as increased
                 workload demands being offered to the computational
                 infrastructure of a cyber-physical system. Guaranteeing
                 system safety under overload conditions is therefore a
                 prime concern in developing and deploying
                 cyber-physical systems. In this work, we study this
                 problem in the context of a radar surveillance system,
                 where tasks have different levels of criticality or
                 influence on system safety. In the face of overloads,
                 we observe that the desirable property in such systems
                 is that the more critical tasks continue to meet their
                 timing requirements. We capture this mixed-criticality
                 overload requirement using a formal overload-tolerance
                 metric called ductility. Using this overload-tolerance
                 metric, we first develop our solution in the context of
                 uniprocessor systems, where we show that Zero-Slack
                 scheduling (ZS) algorithms can be used to improve the
                 overload behavior in mixed-criticality cyber-physical
                 systems compared to existing fixed-priority scheduling
                 algorithms like Rate-Monotonic Scheduling (RMS) and
                 Criticality-As-Priority-Assignment (CAPA). Leveraging
                 these results, we then develop a criticality-aware task
                 allocation algorithm called Compress-on-Overload
                 Packing (COP) for dealing with multiprocessor
                 cyber-physical systems. Evaluation results show that
                 COP achieves up to five times better ductility than
                 traditional load balancing bin-packing algorithms like
                 Worst-Fit Decreasing (WFD). Finally, we apply ZS and
                 COP to the radar surveillance system to demonstrate the
                 resulting improvement in system overload behavior. Our
                 implementation of the Zero-Slack scheduler is available
                 as a part of the Linux/RK project, which provides
                 resource kernel extensions for Linux.",
  acknowledgement = ack-nhfb,
  articleno =    "83",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Woehrle:2012:CTC,
  author =       "Matthias Woehrle and Kai Lampka and Lothar Thiele",
  title =        "Conformance testing for cyber-physical systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "84:1--84:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362351",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Cyber-Physical Systems (CPS) require a high degree of
                 reliability and robustness. Hence it is important to
                 assert their correctness with respect to
                 extra-functional properties, like power consumption,
                 temperature, etc. In turn the physical quantities may
                 be exploited for assessing system implementations. This
                 article develops a methodology for utilizing
                 measurements of physical quantities for testing the
                 conformance of a running CPS with respect to a formal
                 description of its required behavior allowing to
                 uncover defects. We present foundations and
                 implementations of this approach and demonstrate its
                 usefulness by conformance testing power measurements of
                 a wireless sensor node with a formal model of its power
                 consumption.",
  acknowledgement = ack-nhfb,
  articleno =    "84",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhu:2012:OTA,
  author =       "Qi Zhu and Haibo Zeng and Wei Zheng and Marco {Di
                 Natale} and Alberto Sangiovanni-Vincentelli",
  title =        "Optimization of task allocation and priority
                 assignment in hard real-time distributed systems",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "85:1--85:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362352",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The complexity and physical distribution of modern
                 active safety, chassis, and powertrain automotive
                 applications requires the use of distributed
                 architectures. Complex functions designed as networks
                 of function blocks exchanging signal information are
                 deployed onto the physical HW and implemented in a SW
                 architecture consisting of a set of tasks and messages.
                 The typical configuration features priority-based
                 scheduling of tasks and messages and imposes end-to-end
                 deadlines. In this work, we present and compare
                 formulations and procedures for the optimization of the
                 task allocation, the signal to message mapping, and the
                 assignment of priorities to tasks and messages in order
                 to meet end-to-end deadline constraints and minimize
                 latencies. Our formulations leverage worst-case
                 response time analysis within a mixed integer linear
                 optimization framework and are compared for performance
                 against a simulated annealing implementation. The
                 methods are applied for evaluation to an automotive
                 case study of complexity comparable to industrial
                 design problems.",
  acknowledgement = ack-nhfb,
  articleno =    "85",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cucinotta:2012:ART,
  author =       "Tommaso Cucinotta and Fabio Checconi and Luca Abeni
                 and Luigi Palopoli",
  title =        "Adaptive real-time scheduling for legacy multimedia
                 applications",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "86:1--86:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362353",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Multimedia applications are often executed on standard
                 personal computers. The absence of established
                 standards has hindered the adoption of real-time
                 scheduling solutions in this class of applications.
                 Developers have adopted a wide range of heuristic
                 approaches to achieve an acceptable timing behavior but
                 the result is often unreliable. We propose a mechanism
                 to extend the benefits of real-time scheduling to
                 legacy applications based on the combination of two
                 techniques: (1) a real-time monitor that observes and
                 infers the activation period of the application, and
                 (2) a feedback mechanism that adapts the scheduling
                 parameters to improve its real-time performance.",
  acknowledgement = ack-nhfb,
  articleno =    "86",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Scharfenberger:2012:RIP,
  author =       "Christian Scharfenberger and Samarajiit Chakraborty
                 and Georg F{\"a}rber",
  title =        "Robust image processing for an omnidirectional
                 camera-based smart car door",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "87:1--87:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2362354",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Over the last decade, there has been an increasing
                 emphasis on driver-assistance systems for the
                 automotive domain. In this article, we report our work
                 on designing a camera-based surveillance system
                 embedded in a ``smart'' car door. Such a camera is used
                 to monitor the ambient environment outside the car, for
                 instance, the presence of obstacles such as approaching
                 cars or cyclists who might collide with the car door if
                 opened-and automatically control the car door
                 operations. This is an enhancement to the currently
                 available side-view mirrors that the driver/passenger
                 checks before opening the car door. The focus of this
                 article is on fast and robust image processing
                 algorithms specifically targeting such a smart car door
                 system. The requirement is to quickly detect traffic
                 objects of interest from grayscale images captured by
                 omnidirectional cameras. While known algorithms for
                 object extraction from the image processing literature
                 rely on color information and are sensitive to shadows
                 and illumination changes, our proposed algorithms are
                 highly robust, can operate on grayscale images (color
                 images are not available in our setup), and output
                 results in real time. We present a number of
                 experimental results based on image sequences captured
                 from real-life traffic scenarios to demonstrate the
                 applicability of our algorithm.",
  acknowledgement = ack-nhfb,
  articleno =    "87",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Gordon-Ross:2012:CCR,
  author =       "Ann Gordon-Ross and Frank Vahid and Nikil Dutt",
  title =        "Combining code reordering and cache configuration",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "88:1--88:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2399177",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The instruction cache is a popular optimization target
                 due to the cache's high impact on system performance
                 and power and because of the cache's predictable
                 temporal and spatial locality. This article is an in
                 depth study on the interaction of code reordering (a
                 long-known technique) and cache configuration (a
                 relatively new technique). Experimental results show
                 that code reordering coupled with cache configuration
                 reveals additional energy savings as high as 10--15\%
                 for several benchmarks with reduced cache area as high
                 as 48\%. To exploit these additional benefits, we
                 architect and evaluate several design exploration
                 heuristics for combining these two methods.",
  acknowledgement = ack-nhfb,
  articleno =    "88",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Baiocchi:2012:EDB,
  author =       "Jos{\'e} A. Baiocchi and Bruce R. Childers and Jack W.
                 Davidson and Jason D. Hiser",
  title =        "Enabling dynamic binary translation in embedded
                 systems with scratchpad memory",
  journal =      j-TECS,
  volume =       "11",
  number =       "4",
  pages =        "89:1--89:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2362336.2399178",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Thu Jan 10 17:38:16 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Important challenges for embedded systems can be
                 addressed by dynamic binary translation. A dynamic
                 binary translator stores translated instructions in a
                 software-managed code cache, which is usually large to
                 minimize overhead. This article shows how to use a
                 small scratchpad memory for the code cache. A small
                 code cache may require frequent code evictions and
                 retranslation, which degrade performance. We propose
                 techniques to reduce the number of instructions
                 inserted by the translator and a way to form fragments
                 that minimizes translated code size. With our
                 techniques, a much smaller code cache can hold a
                 program's translated code working set.",
  acknowledgement = ack-nhfb,
  articleno =    "89",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Khalgui:2013:ISI,
  author =       "Mohamed Khalgui and Zhiwu Li",
  title =        "Introduction to the {Special Issue on Modeling and
                 Verification of Discrete Event Systems}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406337",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wang:2013:DLE,
  author =       "Shouguang Wang and Chengying Wang and Yanping Yu",
  title =        "Design of Liveness-Enforcing Supervisors for {S3PR}
                 Based on Complementary Places",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406338",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article, an algorithm is proposed to design
                 liveness-enforcing supervisors for systems of simple
                 sequential processes with resources (S$^3$PR) based on
                 complementary places. Firstly, a mixed integer
                 programming (MIP) based deadlock detection method is
                 used to find unmarked strict minimal siphons from an
                 infinite-capacity net. Next, the finite-capacity net,
                 in which liveness can be enforced, is obtained by
                 adding capacity function to the infinite-capacity net.
                 Finally, complementary-place transformation is used to
                 transform the finite-capacity net into an
                 infinite-capacity net. This article focuses on adding a
                 complementary place to each operation place that is
                 related to unmarked siphons, deals with the deadlock
                 problem from a new view point, and hence advances the
                 deadlock control theory. Compared with the existing
                 methods, the new policy is easier to implement for real
                 industrial systems. More importantly, design of a
                 complementary-place supervisor is very easy. Finally,
                 in some cases, the new policy can obtain a structurally
                 simpler supervisor with more permissive behavior than
                 the existing methods do. A flexible manufacturing
                 systems (FMS) example is used to compare the proposed
                 policy with some other methods.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chen:2013:CMS,
  author =       "Yufeng Chen and Gaiyun Liu",
  title =        "Computation of Minimal Siphons in {Petri} Nets by
                 Using Binary Decision Diagrams",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406339",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Siphons play an important role in the development of
                 deadlock control methods by using Petri nets. The
                 number of siphons increases exponentially with respect
                 to the size of a Petri net. This article presents a
                 symbolic approach to the computation of minimal siphons
                 in Petri nets by using binary decision diagrams (BDD).
                 The siphons of a Petri net can be found via a set of
                 logic conditions. The logic conditions are symbolically
                 modeled by using Boolean algebras. The operations of
                 Boolean algebras are implemented by BDD that are
                 capable of representing large sets of siphons with
                 small shared data structures. The proposed method first
                 uses BDD to compute all siphons of a Petri net and then
                 a binary relation is designed to extract all minimal
                 siphons. Finally, by using a number of examples, the
                 efficiency of the proposed method is verified through
                 different-sized problems.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ding:2013:DAV,
  author =       "Zhijun Ding and Changjun Jiang and Mengchu Zhou",
  title =        "Design, Analysis and Verification of Real-Time Systems
                 Based on Time {Petri} Net Refinement",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406340",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A type of refinement operations of time Petri nets is
                 presented for design, analysis and verification of
                 complex real-time systems. First, the behavior
                 preservation is studied under time constraints in a
                 refinement operation, and a sufficient condition for
                 behavior preservation is obtained. Then, the property
                 preservation is considered, and the results indicate
                 that if the refinement operation of time Petri nets
                 satisfies behavior preservation, it can also preserve
                 properties such as boundedness and liveness. Finally,
                 based on the behavior preservation, a reachability
                 decidability algorithm of a refined time Petri net is
                 designed using the reachability trees of its original
                 net and subnet. The research results are illustrated by
                 an example of designing, analyzing and verifying a
                 real-time manufacturing system.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{McInnes:2013:MAT,
  author =       "Allan I. McInnes",
  title =        "Modeling and Analysis of {TinyOS} Sensor Node
                 Firmware: a {CSP} Approach",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406341",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Wireless sensor networks are an increasingly popular
                 application area for embedded systems. Individual
                 sensor nodes within a network are typically
                 resource-constrained, event-driven, and require a high
                 degree of concurrency. This combination of requirements
                 motivated the development of the widely used TinyOS
                 sensor node operating system. The TinyOS concurrency
                 model is a lightweight nonpreemptive system designed to
                 suit the needs of typical sensor network applications.
                 Although the TinyOS concurrency model is easier to
                 reason about than preemptive threads, it can still give
                 rise to undesirable behavior due to unexpected
                 interleavings of related tasks, or unanticipated
                 preemption by interrupt handlers. To aid TinyOS
                 developers in understanding the behavior of their
                 programs we have developed a technique for using the
                 process algebra Communicating Sequential Processes
                 (CSP) to model the interactions between TinyOS
                 components, and between an application and the TinyOS
                 scheduling and preemption mechanisms. Analysis of the
                 resulting models can help TinyOS developers to discover
                 and diagnose concurrency-related errors in their
                 designs that might otherwise go undetected until after
                 the application has been widely deployed. Such analysis
                 is particularly valuable for the TinyOS components that
                 are used as building blocks for a large number of other
                 applications, since a subtle or sporadic error in a
                 widely deployed building block component could be
                 extremely costly to repair.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Godary-Dejean:2013:FVD,
  author =       "Karen Godary-Dejean and David Andreu",
  title =        "Formal Validation of a Deterministic {MAC} Protocol",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406342",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article deals with the formal validation of
                 STIMAP, a medium access protocol that has been designed
                 to meet the specific requirements of an implantable
                 network-based neuroprosthesis. This article presents
                 the modeling and the validation of its medium access,
                 using model checking on Time Petri Nets. Doing so, we
                 show that existent formal methods and tools are not
                 perfectly suitable for the validation of real systems,
                 especially when some hardware parameters have to be
                 considered. This article then presents how these
                 difficulties have been managed during the modeling and
                 verification phases, and gives the validation results
                 for STIMAP, providing constraints to respect.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Boucheneb:2013:RIS,
  author =       "Hanifa Boucheneb and Kamel Barkaoui",
  title =        "Reducing Interleaving Semantics Redundancy in
                 Reachability Analysis of Time {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406343",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The main problem of verification techniques based on
                 exploration of (reachable) state space is the state
                 explosion problem. In timed models, abstract states
                 reached by different interleavings of the same set of
                 transitions are, in general, different and their union
                 is not necessarily an abstract state. To attenuate this
                 state explosion, it would be interesting to reduce the
                 redundancy caused by the interleaving semantics by
                 agglomerating all these abstract states whenever their
                 union is an abstract state. This article considers the
                 time Petri net model and establishes some sufficient
                 conditions that ensure that this union is an abstract
                 state. In addition, it proposes a procedure to compute
                 this union without computing beforehand intermediate
                 abstract states. Finally, it shows how to use this
                 result to improve the reachability analysis.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhang:2013:SCE,
  author =       "Zhiming Zhang and Weimin Wu",
  title =        "Sequence Control of Essential Siphons for Deadlock
                 Prevention in {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406344",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Deadlock prevention is crucial to the modeling of
                 flexible manufacturing systems. In the Petri net
                 framework, deadlock prevention is often addressed by
                 siphon-based control (SC) policies. Recent research
                 results show that SC methods can avoid full siphon
                 enumeration by using mixed integer programming (MIP) to
                 greatly increase the computational efficiency so that
                 it can be applied in large systems in computable time.
                 Besides, maximally permissive control solutions can be
                 obtained by means of iterative siphon control (ISC)
                 approaches and MIP. Then the remaining problems are
                 redundancy and MIP iterations. Redundant controllers
                 make the closed-loop system more complicated and each
                 MIP iteration increases the total computational time.
                 This article proposes a revised ISC deadlock prevention
                 policy which can achieve better results than the other
                 reported methods in terms of redundancy and MIP
                 iterations while maintaining the maximal
                 permissiveness. Several benchmark examples are provided
                 to illustrate the proposed approach and to be compared
                 with the other reported methods.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Ahmed:2013:HGA,
  author =       "Zakir Hussain Ahmed",
  title =        "A Hybrid Genetic Algorithm for the {Bottleneck
                 Traveling Salesman Problem}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406345",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The bottleneck traveling salesman problem is to find a
                 Hamiltonian circuit that minimizes the largest cost of
                 any of its arcs in a graph. A simple genetic algorithm
                 (GA) using sequential constructive crossover has been
                 developed to obtain heuristic solution to the problem.
                 The hybrid GA incorporates 2-opt search, another
                 proposed local search and immigration to the simple GA
                 for obtaining better solution. The efficiency of our
                 hybrid GA to the problem against two existing heuristic
                 algorithms has been examined for some symmetric TSPLIB
                 instances. The comparative study shows the
                 effectiveness of our hybrid algorithm. Finally, we
                 present solutions to the problem for asymmetric TSPLIB
                 instances.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Wu:2013:OSL,
  author =       "Naiqi Wu and Mengchu Zhou and Gang Hu",
  title =        "One-Step Look-Ahead Maximally Permissive Deadlock
                 Control of {AMS} by Using {Petri} Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406346",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "It is desired that a deadlock control policy for
                 automated manufacturing systems (AMS) is maximally
                 permissive. However, its tractability issue remains
                 open, and this work addresses this important issue. It
                 models AMS with a resource-oriented Petri net (ROPN)
                 and presents a necessary and sufficient condition under
                 which there exists a one-step look-ahead maximally
                 permissive control policy for deadlock avoidance in
                 AMS. It further identifies some conditions under which
                 a one-step look-ahead maximally permissive deadlock
                 control policy exists for a single-capacity system. The
                 conditions can be conveniently examined by using the
                 developed ROPN model.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Huang:2013:TBD,
  author =       "Yi-Sheng Huang and Yen-Liang Pan and Pin-June Su",
  title =        "Transition-Based Deadlock Detection and Recovery
                 Policy for {FMSs} Using Graph Technique",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406347",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "A transition-controlled deadlock detection and
                 recovery prevention policy is presented for a subclass
                 of Petri nets used to model flexible manufacturing
                 systems. The subclass is called systems of simple
                 sequential processes with resources (S$^3$PR). The
                 proposed policy is different from the standard deadlock
                 prevention policies. Instead of adding control places,
                 this policy adds a controlled transition to solve a
                 group of deadlocked markings that have the same
                 graph-based property. Finally, the results of our study
                 indicate that the proposed policy appears to be more
                 permissive than those existing ones that add control
                 places.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nazemzadeh:2013:FMD,
  author =       "Payam Nazemzadeh and Abbas Dideban and Meisam
                 Zareiee",
  title =        "Fault Modeling in Discrete Event Systems Using {Petri}
                 Nets",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406348",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In this article a model-based controller
                 reconfiguration method for fault-tolerant control of
                 discrete event systems has been introduced. In this
                 method, we model the fault conditions for each
                 specified fault as a new model called fault model. The
                 system then consists of three different models called
                 process, specification and fault. The faulty parts of
                 the system are not permitted to do any job and the
                 controller tries to enforce the specifications by other
                 parts of the system. With this method, the controller
                 reconfiguration problem for fault- tolerant control of
                 discrete event systems converts to the problem of
                 synchronizing the process, specification and fault
                 model. We must synthesize a supervisor that can enforce
                 both specifications and faults status. If this
                 supervisor can be determined, we can achieve a
                 fault-tolerant controller. Implementing both
                 specification and fault models in the system, may lead
                 to a large number of forbidden states and constraints
                 and so on a more complicated forbidden states problem
                 must be solved. The application of constraints
                 simplification methods is shown. By the existing
                 methods for offline simplifying of constraints, we can
                 arrive at a simplified fault tolerant controller.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mhamdi:2013:FMT,
  author =       "Tarek Mhamdi and Osman Hasan and Sofi{\`e}ne Tahar",
  title =        "Formalization of Measure Theory and {Lebesgue}
                 Integration for Probabilistic Analysis in {HOL}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406349",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Dynamic systems that exhibit probabilistic behavior
                 represent a large class of man-made systems such as
                 communication networks, air traffic control, and other
                 mission-critical systems. Evaluation of quantitative
                 issues like performance and dependability of these
                 systems is of paramount importance. In this paper, we
                 propose a generalized methodology to formally reason
                 about probabilistic systems within a theorem prover. We
                 present a formalization of measure theory in the HOL
                 theorem prover and use it to formalize basic concepts
                 from the theory of probability. We also use the
                 Lebesgue integration to formalize statistical
                 properties of random variables. To illustrate the
                 practical effectiveness of our methodology, we formally
                 prove classical results from the theories of
                 probability and information and use them in a data
                 compression application in HOL.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Khalgui:2013:RRE,
  author =       "Mohamed Khalgui and Olfa Mosbahi and Zhiwu Li",
  title =        "Runtime Reconfigurations of Embedded Controllers",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406350",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The article deals with Reconfigurable Embedded Control
                 Systems following different Component-based
                 Technologies and/or Architecture Description Languages
                 used today in Industry. We define a Control Component
                 as a software unit to support control tasks of the
                 system which is assumed to be a network of components
                 with precedence constraints. We define an agent-based
                 architecture to handle automatic reconfigurations under
                 well-defined conditions by creating, deleting or
                 updating components to bring the whole system into safe
                 and optimal behaviors. To cover all possible
                 reconfiguration forms, we model the agent by nested
                 state machines according to the formalism Net
                 Condition/Event Systems (abbr. NCES) which is an
                 extension of Petri nets. We apply in addition a model
                 checking to verify functional and extra-functional
                 properties according to the temporal logic
                 ``Computation Tree Logic'' (abbr. CTL). The goal is to
                 check the agent's reactivity after any evolution of the
                 environment. Several complex networks can implement the
                 system such that each one is executed at a given time
                 when a corresponding reconfiguration scenario is
                 automatically applied by the agent. To check the
                 correctness of each one of them, we apply in several
                 steps a refinement-based approach that automatically
                 specifies feasible Control Components according to
                 NCES. The model checker SESA is automatically applied
                 in each step to verify deadlock properties of new
                 generated components, and is manually used to verify
                 CTL-based properties according to user requirements.
                 Two Industrial Benchmark Production Systems FESTO and
                 EnAS available in our research laboratory are applied
                 to explain the article's contributions.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mery:2013:FSM,
  author =       "Dominique M{\'e}ry and Neeraj Kumar Singh",
  title =        "Formal Specification of Medical Systems by Proof-Based
                 Refinement",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406351",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Formal methods have emerged as an alternative approach
                 to ensuring quality and correctness of highly critical
                 systems, overcoming limitations of traditional
                 validation techniques such as simulation and testing.
                 We propose a refinement-based methodology for complex
                 medical systems design, which possesses all the
                 required key features. A refinement-based combined
                 approach of formal verification, model validation using
                 a model-checker and refinement chart is proposed in
                 this methodology for designing a high-confidence
                 medical device. Furthermore, we show the effectiveness
                 of this methodology for the design of a cardiac
                 pacemaker system.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Mosbahi:2013:CFM,
  author =       "Olfa Mosbahi",
  title =        "Combining Formal Methods for the Development of
                 Reactive Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406352",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article deals with the use of two verification
                 approaches: theorem proving and model checking. We
                 focus on the Event-B method by using its associated
                 theorem proving tool (Click_n_Prove), and on the
                 language TLA$^+$ by using its model checker TLC. By
                 considering the limitation of the Event-B method to
                 invariance properties, we propose to apply the language
                 TLA$^+$ to verify liveness properties on a software
                 behavior. We extend first the expressivity and the
                 semantics of a B model (called temporal B model ) to
                 deal with the specification of fairness and eventuality
                 properties. Second, we give transformation rules from a
                 temporal B model into a TLA$^+$ module. We present in
                 particular, our prototype system called B2TLA$^+$,
                 that we have developed to support this transformation;
                 then we can verify these properties thanks to the model
                 checker TLC on finite state systems. For the
                 verification of infinite-state systems, we propose the
                 use of the predicate diagrams. We illustrate our
                 approach on a case study of a parcel sorting system.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Sunder:2013:FVD,
  author =       "Christoph S{\"u}nder and Valeriy Vyatkin and Alois
                 Zoitl",
  title =        "Formal Verification of Downtimeless System Evolution
                 in Embedded Automation Controllers",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406353",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article presents a new formal approach to
                 validation of on-the-fly modification of control
                 software in automation systems. The concept of
                 downtimeless system evolution (DSE) is introduced. The
                 DSE is essentially based on the use of IEC 61499 system
                 architecture and formal modeling and verification of
                 the hardware and software of an automation device. The
                 validation is performed by means of two complimentary
                 techniques: analytic calculations and formal
                 verification by model-checking.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Khalgui:2013:DRA,
  author =       "Mohamed Khalgui",
  title =        "Distributed Reconfigurations of Autonomous {IEC61499}
                 Systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2406336.2406354",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Fri Jan 25 17:38:43 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The article deals with Distributed Multiagent
                 Reconfigurable Embedded Control Systems following the
                 International Industrial Standard IEC61499 in which a
                 Function Block (Abbreviated by FB) is an
                 event-triggered software component owning data and a
                 control system is a network of distributed blocks. We
                 define a multiagent embedded architecture in which a
                 Reconfiguration Agent is affected to each device of the
                 execution environment to apply local reconfigurations,
                 and a Coordination Agent is proposed for coordination
                 between devices in order to guarantee safe and coherent
                 distributed reconfigurations. A Communication Protocol
                 is proposed to handle such coordination by using
                 well-defined Coordination Matrices. A prototype is
                 developed to simulate the whole architecture when
                 faults occur or system's optimizations are applied. We
                 specify Reconfiguration Agents to be modeled by nested
                 state machines, and the Coordination Agent according to
                 the formalism Net Condition/Event Systems (Abbreviated
                 by NCES) which is an extension of Petri nets. To allow
                 correct and coherent distributed reconfigurations, we
                 check all possible interactions between controllers by
                 verifying that whenever a reconfiguration is applied in
                 a device, the Coordination Agent and other concerned
                 devices react as described in user requirements. We
                 propose finally XML-based implementations of both
                 Coordination and Reconfiguration Agents according the
                 the technology IEC61499. The article's contributions
                 are applied to two Benchmark Production Systems
                 available in our research laboratory.",
  acknowledgement = ack-nhfb,
  articleno =    "18",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chen:2013:ISS,
  author =       "Jian-Jia Chen and Maurizio Palesi",
  title =        "Introduction to the special section on
                 {ESTIMedia'12}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "32:1--32:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435228",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Nikitakis:2013:NLP,
  author =       "Antonis Nikitakis and Savvas Papaioannou and Ioannis
                 Papaefstathiou",
  title =        "A novel low-power embedded object recognition system
                 working at multi-frames per second",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "33:1--33:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435229",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "One very important challenge in the field of
                 multimedia is the implementation of fast and detailed
                 Object Detection and Recognition systems. In
                 particular, in the current state-of-the-art mobile
                 multimedia systems, it is highly desirable to detect
                 and locate certain objects within a video frame in real
                 time. Although a significant number of Object Detection
                 and Recognition schemes have been developed and
                 implemented, triggering very accurate results, the vast
                 majority of them cannot be applied in state-of-the-art
                 mobile multimedia devices; this is mainly due to the
                 fact that they are highly complex schemes that require
                 a significant amount of processing power, while they
                 are also time consuming and very power hungry. In this
                 article, we present a novel FPGA-based embedded
                 implementation of a very efficient object recognition
                 algorithm called Receptive Field Cooccurrence
                 Histograms Algorithm (RFCH). Our main focus was to
                 increase its performance so as to be able to handle the
                 object recognition task of today's highly sophisticated
                 embedded multimedia systems while keeping its energy
                 consumption at very low levels. Our low-power embedded
                 reconfigurable system is at least 15 times faster than
                 the software implementation on a low-voltage high-end
                 CPU, while consuming at least 60 times less energy. Our
                 novel system is also 88 times more energy efficient
                 than the recently introduced low-power multi-core Intel
                 devices which are optimized for embedded systems. This
                 is, to the best of our knowledge, the first system
                 presented that can execute the complete complex object
                 recognition task at a multi frame per second rate while
                 consuming minimal amounts of energy, making it an ideal
                 candidate for future embedded multimedia systems.",
  acknowledgement = ack-nhfb,
  articleno =    "33",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Zhai:2013:MSA,
  author =       "Jiali Teddy Zhai and Hristo Nikolov and Todor
                 Stefanov",
  title =        "Mapping of streaming applications considering
                 alternative application specifications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "34:1--34:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435230",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Streaming applications often require a parallel Model
                 of Computation (MoC) to specify their application
                 behavior and to facilitate mapping onto Multi-Processor
                 System-on-Chip (MPSoC) platforms. Various performance
                 requirements and resource budgets of embedded systems
                 ask for an efficient design space exploration (DSE)
                 approach to select the best design from a design space
                 consisting of a large number of design choices.
                 However, existing DSE approaches explore the design
                 space that includes only architecture and mapping
                 alternatives for an initial application specification
                 given by the application designer. In this article, we
                 first show that a design often might not be optimal if
                 alternative specifications of a given application are
                 not taken into account. We further argue that the best
                 alternative specification consists of only independent
                 and load-balanced application tasks. Based on the
                 Polyhedral Process Network (PPN) MoC, we present an
                 approach to analyze and transform an initial PPN to an
                 alternative one that contains only independent
                 processes if possible. Finally, by prototyping
                 real-life applications on both FPGA-based MPSoCs and
                 desktop multi-core platforms, we demonstrate that
                 mapping the alternative application specification
                 results in a large performance gain compared to those
                 approaches, in which alternative application
                 specifications are not taken into account.",
  acknowledgement = ack-nhfb,
  articleno =    "34",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Geuns:2013:SST,
  author =       "Stefan J. Geuns and Joost P. H. M. Hausmans and Marco
                 J. G. Bekooij",
  title =        "Sequential specification of time-aware stream
                 processing applications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "35:1--35:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435231",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Automatic parallelization of Nested Loop Programs
                 (NLPs) is an attractive method to create embedded
                 real-time stream processing applications for multi-core
                 systems. However, the description and parallelization
                 of applications with a time dependent functional
                 behavior has not been considered in NLPs. In such a
                 description, semantic information about time dependent
                 behavior must be made available for the compiler, such
                 that an optimized time independent implementation can
                 be generated automatically. This article introduces
                 language constructs with temporal semantics to NLPs.
                 Using these language constructs, time dependent
                 applications can be specified and a corresponding
                 data-driven implementation can be generated for use on
                 a multi-core system. Despite that these time-aware
                 language constructs can be data-dependent, the
                 application remains functionally deterministic.
                 Pipelining is exploited to increase the throughput of
                 an application. The media access control (MAC) protocol
                 of an IEEE 802.11p WLAN transceiver is used to
                 illustrate the relevance and applicability of the
                 introduced concepts.",
  acknowledgement = ack-nhfb,
  articleno =    "35",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Lee:2013:LAB,
  author =       "Daeyoung Lee and Hyunok Oh",
  title =        "A lifetime aware buffer assignment method for
                 streaming applications on {DRAM\slash PRAM} hybrid
                 memory",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "36:1--36:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435232",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "This article proposes a lifetime aware buffer
                 assignment method for streaming applications like
                 multimedia specified in a synchronous dataflow (SDF)
                 graph on a DRAM/PRAM hybrid memory in which the
                 endurance of PRAM is limited. We determine whether
                 buffers are assigned to DRAM or PRAM to minimize the
                 writing frequency of PRAM. To solve the problems, we
                 formulate them using Answer Set Programming.
                 Experimental results show that the proposed approach
                 increases the PRAM lifetime by 63\% compared with no
                 optimization, and shows the tradeoff between PRAM and
                 DRAM size to guarantee a lifetime constraint.",
  acknowledgement = ack-nhfb,
  articleno =    "36",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Chung:2013:EUE,
  author =       "Yi-Fan Chung and Yin-Tsung Lo and Chung-Ta King",
  title =        "Enhancing user experiences by exploiting energy and
                 launch delay trade-off of mobile multimedia
                 applications",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "37:1--37:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435233",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Launch delay has been an important factor affecting
                 users' experiences in mobile multimedia applications.
                 To launch applications quickly, modern mobile systems
                 such as Android usually keep inactive applications in
                 the background and manage them through an LRU-based
                 activity stack. Whenever the user wants to run and
                 interact with a background application again, that
                 application can be switched back into the foreground
                 quickly from the activity stack without delay in
                 initializing the applications anew. Since background
                 multimedia applications often continuously consume the
                 battery power of the smart phone, the challenge is to
                 effect a balance between application launch delay and
                 battery lifetime. In this article, we propose
                 innovative application management strategies that
                 terminate ``unbeneficial'' background applications to
                 save energy and pre-launch ``beneficial'' applications
                 to improve the application launch delay. The proposed
                 strategies are evaluated through a trace-driven
                 simulation and a real experiment. The results show that
                 the average application launch delay can be reduced by
                 15\% while the average battery lifetime is increased by
                 18\%.",
  acknowledgement = ack-nhfb,
  articleno =    "37",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{DeSutter:2013:ISS,
  author =       "Bjorn {De Sutter} and Jan Vitek",
  title =        "Introduction to the special section on {LCTES'11}",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "38:1--38:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435234",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "38",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Berthier:2013:SPD,
  author =       "Nicolas Berthier and Florence Maraninchi and Laurent
                 Mounier",
  title =        "Synchronous programming of device drivers for global
                 resource control in embedded operating systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "39:1--39:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435235",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "In embedded systems, controlling a shared resource
                 like a bus, or improving a property like power
                 consumption, may be hard to achieve when programming
                 device drivers individually. In this article, we
                 propose a global resource control approach, based on a
                 centralized view of the devices' states. The solution
                 we propose operates on the hardware/software interface.
                 It involves a simple adaptation of the application
                 level, to communicate with the hardware via a control
                 layer. The control layer itself is built from a set of
                 simple automata: the device drivers, whose states
                 correspond to functional or power consumption modes,
                 and a controller to enforce global properties. All
                 these automata are programmed using a synchronous
                 language, and compiled into a single piece of C code.
                 We take as example the node of a sensor network. We
                 explain the approach in details, demonstrate its use
                 and benefits with an event-driven or multithreading
                 operating system, and draw guidelines for its use in
                 other contexts.",
  acknowledgement = ack-nhfb,
  articleno =    "39",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Cullmann:2013:CPA,
  author =       "Christoph Cullmann",
  title =        "Cache persistence analysis: Theory and practice",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "40:1--40:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435236",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "To compute a worst-case execution time (WCET) estimate
                 for a program, the architectural effects of the
                 underlying hardware must be modeled. For modern
                 processors this results in the need for a cache and
                 pipeline analysis. The timing-relevant result of the
                 cache analysis is the categorization of the accesses to
                 cached memory. Categorizations that are obtainable by
                 the well-known must and may cache analysis [Ferdinand
                 1997] are always-hit, always-miss and not-classified.
                 The cache persistence analysis tries to provide
                 additional information for the not-classified case to
                 limit the number of misses. There exists a cache
                 persistence analysis by Ferdinand and Wilhelm based on
                 abstract interpretation computing these
                 classifications. In this article, we present a
                 correctness issue with this analysis. To fix this
                 issue, we propose two new abstract interpretation based
                 persistence analyses and show their safety. One is
                 based on the known may analysis and a second one on the
                 concept of conflict counting. For fully timing
                 compositional architectures [Wilhelm et al. 2009] the
                 persistence information is straightforward to use. We
                 will apply the concepts of persistence analysis for the
                 first time to state-of-the-art architectures that
                 exhibit both timing anomalies and domino effects. Such
                 architectures do not allow the analyzer to quantify the
                 costs of a single cache hit or miss in isolation. To
                 make the usage of the persistence information feasible,
                 we integrate the presented novel persistence analyses
                 together with a novel path analysis approach into the
                 industrially used WCET analyzer aiT.",
  acknowledgement = ack-nhfb,
  articleno =    "40",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Sifakis:2013:ISS,
  author =       "Joseph Sifakis and Lothar Thiele and Reinhard
                 Wilhelm",
  title =        "Introduction to the special section on rigorous
                 embedded systems design",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "41:1--41:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435237",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  acknowledgement = ack-nhfb,
  articleno =    "41",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Reineke:2013:SCR,
  author =       "Jan Reineke and Daniel Grund",
  title =        "Sensitivity of cache replacement policies",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "42:1--42:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435238",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "The sensitivity of a cache replacement policy
                 expresses to what extent the execution history may
                 influence the number of cache hits and misses during
                 program execution. We present an algorithm to compute
                 the sensitivity of a replacement policy. We have
                 implemented this algorithm in a tool called R elacs
                 that can handle a large class of replacement policies
                 including LRU, FIFO, PLRU, and MRU. Sensitivity
                 properties obtained with Relacs demonstrate that the
                 execution history can have a strong impact on the
                 number of cache hits and misses if FIFO, PLRU, or MRU
                 is used. A simple model of execution time is used to
                 evaluate the impact of cache sensitivity on measured
                 execution times. The model shows that measured
                 execution times may strongly underestimate the
                 worst-case execution time for FIFO, PLRU, and MRU.",
  acknowledgement = ack-nhfb,
  articleno =    "42",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Jeong:2013:RRM,
  author =       "Jinkyu Jeong and Hwanju Kim and Jeaho Hwang and
                 Joonwon Lee and Seungryoul Maeng",
  title =        "Rigorous rental memory management for embedded
                 systems",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "43:1--43:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435239",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/tecs.bib",
  abstract =     "Memory reservation in embedded systems is a prevalent
                 approach to provide a physically contiguous memory
                 region to its integrated devices, such as a camera
                 device and a video decoder. Inefficiency of the memory
                 reservation becomes a more significant problem in
                 emerging embedded systems, such as smartphones and
                 smart TVs. Many ways of using these systems increase
                 the idle time of their integrated devices, and
                 eventually decrease the utilization of their reserved
                 memory. In this article, we propose a scheme to
                 minimize the memory inefficiency caused by the memory
                 reservation. The memory space reserved for a device can
                 be rented for other purposes when the device is not
                 active. For this scheme to be viable, latencies
                 associated with reallocating the memory space should be
                 minimal. Volatile pages are good candidates for such
                 page reallocation since they can be reclaimed
                 immediately as they are needed by the original device.
                 We also provide two optimization techniques,
                 lazy-migration and adaptive-activation. The former
                 increases the lowered utilization of the rental memory
                 by our volatile page allocations, and the latter saves
                 active pages in the rental memory during the
                 reallocation. We implemented our scheme on a smartphone
                 development board with the Android Linux kernel. Our
                 prototype has shown that the time for the return
                 operation is less than 0.77 seconds in the tested
                 cases. We believe that this time is acceptable to
                 end-users in terms of transparency since the time can
                 be hidden in application initialization time. The
                 rental memory also brings throughput increases ranging
                 from 2\% to 200\% based on the available memory and the
                 applications' memory intensiveness.",
  acknowledgement = ack-nhfb,
  articleno =    "43",
  fjournal =     "ACM Transactions on Embedded Computing Systems",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}

@Article{Vasilikos:2013:HSA,
  author =       "Vasileios Vasilikos and Georgios Smaragdos and
                 Christos Strydis and Ioannis Sourdis",
  title =        "Heuristic search for adaptive, defect-tolerant
                 multiprocessor arrays",
  journal =      j-TECS,
  volume =       "12",
  number =       "1s",
  pages =        "44:1--44:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435227.2435240",
  ISSN =         "1539-9087 (print), 1558-3465 (electronic)",
  ISSN-L =       "1539-9087",
  bibdate =      "Tue Mar 19 07:54:21 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 http://www.math.utah.edu/pub/tex/bib/t