%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.11",
%%%     date            = "27 May 2014",
%%%     time            = "16:55:08 MDT",
%%%     filename        = "jdiq.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "06025 2459 13139 125726",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "Journal of Data and Information Quality
%%%                       (JDIQ); bibliography",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        the ACM Journal of Data and Information
%%%                        Quality (JDIQ) (CODEN ????, ISSN 1936-1955),
%%%                        covering all journal issues from 2009 --
%%%                        date.
%%%
%%%                        At version 1.11, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2009 (  17)    2011 (   8)    2013 (   8)
%%%                             2010 (   6)    2012 (  15)    2014 (   4)
%%%
%%%                             Article:         58
%%%
%%%                             Total entries:   58
%%%
%%%                        The journal table of contents page is at:
%%%
%%%                            http://www.acm.org/jdiq/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J1191
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
    "\def \TM {${}^{\sc TM}$}"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-JDIQ                  = "Journal of Data and Information
                                  Quality (JDIQ)"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Madnick:2009:EII,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Editorial for the Inaugural Issue of the {ACM Journal
                 of Data and Information Quality (JDIQ)}",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  ISSN =         "1936-1955",
  bibdate =      "Fri Sep 18 15:11:35 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Madnick:2009:OFD,
  author =       "Stuart E. Madnick and Richard Y. Wang and Yang W. Lee
                 and Hongwei Zhu",
  title =        "Overview and Framework for Data and Information
                 Quality Research",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1515693.1516680",
  ISSN =         "1936-1955",
  bibdate =      "Fri Sep 18 15:11:35 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Awareness of data and information quality issues has
                 grown rapidly in light of the critical role played by
                 the quality of information in our data-intensive,
                 knowledge-based economy. Research in the past two
                 decades has produced a large body of data quality
                 knowledge and has expanded our ability to solve many
                 data and information quality problems. In this article,
                 we present an overview of the evolution and current
                 landscape of data and information quality research. We
                 introduce a framework to characterize the research
                 along two dimensions: topics and methods.
                 Representative papers are cited for purposes of
                 illustrating the issues addressed and the methods used.
                 We also identify and discuss challenges to be addressed
                 in future research.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Li:2009:BAE,
  author =       "Xiao-Bai Li",
  title =        "A {Bayesian} Approach for Estimating and Replacing
                 Missing Categorical Data",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1515693.1515695",
  ISSN =         "1936-1955",
  bibdate =      "Fri Sep 18 15:11:35 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "We propose a new approach for estimating and replacing
                 missing categorical data. With this approach, the
                 posterior probabilities of a missing attribute value
                 belonging to a certain category are estimated using the
                 simple Bayes method. Two alternative methods for
                 replacing the missing value are proposed: The first
                 replaces the missing value with the value having the
                 estimated maximum probability; the second uses a value
                 that is selected with probability proportional to the
                 estimated posterior distribution. The effectiveness of
                 the proposed approach is evaluated based on some
                 important data quality measures for data warehousing
                 and data mining. The results of the experimental study
                 demonstrate the effectiveness of the proposed
                 approach.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Weber:2009:OSD,
  author =       "Kristin Weber and Boris Otto and Hubert {\"O}sterle",
  title =        "One Size Does Not Fit All---{A} Contingency Approach
                 to Data Governance",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1515693.1515696",
  ISSN =         "1936-1955",
  bibdate =      "Fri Sep 18 15:11:35 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Enterprizes need Data Quality Management (DQM) to
                 respond to strategic and operational challenges
                 demanding high-quality corporate data. Hitherto,
                 companies have mostly assigned accountabilities for DQM
                 to Information Technology (IT) departments. They have
                 thereby neglected the organizational issues critical to
                 successful DQM. With data governance, however,
                 companies may implement corporate-wide accountabilities
                 for DQM that encompass professionals from business and
                 IT departments. This research aims at starting a
                 scientific discussion on data governance by
                 transferring concepts from IT governance and
                 organizational theory to the previously largely ignored
                 field of data governance. The article presents the
                 first results of a community action research project on
                 data governance comprising six international companies
                 from various industries. It outlines a data governance
                 model that consists of three components (data quality
                 roles, decision areas, and responsibilities), which
                 together form a responsibility assignment matrix. The
                 data governance model documents data quality roles and
                 their type of interaction with DQM activities. In
                 addition, the article describes a data governance
                 contingency model and demonstrates the influence of
                 performance strategy, diversification breadth,
                 organization structure, competitive strategy, degree of
                 process harmonization, degree of market regulation, and
                 decision-making style on data governance. Based on
                 these findings, companies can structure their specific
                 data governance model.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Heinrich:2009:PDM,
  author =       "B. Heinrich and M. Klier and M. Kaiser",
  title =        "A Procedure to Develop Metrics for Currency and its
                 Application in {CRM}",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jun,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1515693.1515697",
  ISSN =         "1936-1955",
  bibdate =      "Fri Sep 18 15:11:35 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Due to the importance of using up-to-date data in
                 information systems, this article analyzes how the
                 data-quality dimension currency can be quantified.
                 Based on several requirements (e.g., normalization and
                 interpretability) and a literature review, we design a
                 procedure to develop probability-based metrics for
                 currency which can be adjusted to the specific
                 characteristics of data attribute values. We evaluate
                 the presented procedure with regard to the requirements
                 and illustrate the applicability as well as its
                 practical benefit. In cooperation with a major German
                 mobile services provider, the procedure was applied in
                 the field of campaign management in order to improve
                 both success rates and profits.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Madnick:2009:ELS,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Editorial Letter for the Special Issue on Data Quality
                 in Databases and Information Systems",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "6:1--6:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577841",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Naumann:2009:GES,
  author =       "Felix Naumann and Louiqa Raschid",
  title =        "Guest Editorial for the Special Issue on Data Quality
                 in Databases",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "7:1--7:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577842",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Dash:2009:MLN,
  author =       "Manoranjan Dash and Ayush Singhania",
  title =        "Mining in Large Noisy Domains",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577843",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In this article we address the issue of how to mine
                 efficiently in large and noisy data. We propose an
                 efficient sampling algorithm ({\em Concise\/}) as a
                 solution for large and noisy data. Concise is far more
                 superior than the Simple Random Sampling ({\em SRS\/})
                 in selecting a representative sample. Particularly when
                 the data is very large and noisy, Concise achieves the
                 maximum gain over SRS. The comparison is in terms of
                 their impact on subsequent data mining tasks,
                 specifically, classification, clustering, and
                 association rule mining. We compared Concise with a few
                 existing noise removal algorithms followed by SRS.
                 Although the accuracy of mining results are similar,
                 Concise spends very little time compared to the
                 existing algorithms because Concise has linear time
                 complexity.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "association rule mining; classification; clustering;
                 data mining; Information filtering; sampling; selection
                 process",
}

@Article{Moustakides:2009:OSR,
  author =       "George V. Moustakides and Vassilios S. Verykios",
  title =        "Optimal Stopping: {A} Record-Linkage Approach",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577844",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Record-linkage is the process of identifying whether
                 two separate records refer to the same real-world
                 entity when some elements of the record's identifying
                 information (attributes) agree and others disagree.
                 Existing record-linkage decision methodologies use the
                 outcomes from the comparisons of the whole set of
                 attributes. Here, we propose an alternative scheme that
                 assesses the attributes sequentially, allowing for a
                 decision to made at any attribute's comparison stage,
                 and thus before exhausting all available attributes.
                 The scheme we develop is optimum in that it minimizes a
                 well-defined average cost criterion while the
                 corresponding optimum solution can be easily mapped
                 into a decision tree to facilitate the record-linkage
                 decision process. Experimental results performed in
                 real datasets indicate the superiority of our
                 methodology compared to existing approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "duplicate detection; optimal stopping;
                 Record-linkage",
}

@Article{Klein:2009:RDQ,
  author =       "A. Klein and W. Lehner",
  title =        "Representing Data Quality in Sensor Data Streaming
                 Environments",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577845",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Sensors in smart-item environments capture data about
                 product conditions and usage to support business
                 decisions as well as production automation processes. A
                 challenging issue in this application area is the
                 restricted quality of sensor data due to limited sensor
                 precision and sensor failures. Moreover, data stream
                 processing to meet resource constraints in streaming
                 environments introduces additional noise and decreases
                 the data quality. In order to avoid wrong business
                 decisions due to dirty data, quality characteristics
                 have to be captured, processed, and provided to the
                 respective business task. However, the issue of how to
                 efficiently provide applications with information about
                 data quality is still an open research problem.\par

                 In this article, we address this problem by presenting
                 a flexible model for the propagation and processing of
                 data quality. The comprehensive analysis of common data
                 stream processing operators and their impact on data
                 quality allows a fruitful data evaluation and
                 diminishes incorrect business decisions. Further, we
                 propose the data quality model control to adapt the
                 data quality granularity to the data stream
                 interestingness.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "data quality; Data stream processing; smart items",
}

@Article{Embury:2009:IDS,
  author =       "Suzanne M. Embury and Paolo Missier and Sandra Sampaio
                 and R. Mark Greenwood and Alun D. Preece",
  title =        "Incorporating Domain-Specific Information Quality
                 Constraints into Database Queries",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577846",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "The range of information now available in queryable
                 repositories opens up a host of possibilities for new
                 and valuable forms of data analysis. Database query
                 languages such as SQL and XQuery offer a concise and
                 high-level means by which such analyses can be
                 implemented, facilitating the extraction of relevant
                 data subsets into either generic or bespoke data
                 analysis environments. Unfortunately, the quality of
                 data in these repositories is often highly variable.
                 The data is still useful, but only if the consumer is
                 aware of the data quality problems and can work around
                 them. Standard query languages offer little support for
                 this aspect of data management. In principle, however,
                 it should be possible to embed constraints describing
                 the consumer's data quality requirements into the query
                 directly, so that the query evaluator can take over
                 responsibility for enforcing them during query
                 processing.\par

                 Most previous attempts to incorporate information
                 quality constraints into database queries have been
                 based around a small number of highly generic quality
                 measures, which are defined and computed by the
                 information provider. This is a useful approach in some
                 application areas but, in practice, quality criteria
                 are more commonly determined by the user of the
                 information not by the provider. In this article, we
                 explore an approach to incorporating quality
                 constraints into database queries where the definition
                 of quality is set by the user and not the provider of
                 the information. Our approach is based around the
                 concept of a {\em quality view}, a configurable quality
                 assessment component into which domain-specific notions
                 of quality can be embedded. We examine how quality
                 views can be incorporated into XQuery, and draw from
                 this the language features that are required in general
                 to embed quality views into any query language. We also
                 propose some syntactic sugar on top of XQuery to
                 simplify the process of querying with quality
                 constraints.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "database query languages; Information quality; views;
                 XQuery",
}

@Article{Madnick:2009:CPS,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Call for Papers Special Issue on Healthcare
                 Information Quality: the Challenges and Opportunities
                 in Healthcare Systems and Services",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "2",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1577840.1577847",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:40 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Madnick:2009:ECW,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Editors' Comments: Where the {JDIQ} Articles Come
                 From: Incubating Research in an Emerging Field",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "3",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1659225.1659226",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:55 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Sessions:2009:TMD,
  author =       "V. Sessions and M. Valtorta",
  title =        "Towards a Method for Data Accuracy Assessment
                 Utilizing a {Bayesian} Network Learning Algorithm",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "3",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1659225.1659227",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:55 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "This research develops a data quality algorithm
                 entitled the Accuracy Assessment Algorithm (AAA). This
                 is an extension of research in developing an
                 enhancement to a Bayesian Network (BN) learning
                 algorithm called the Data Quality (DQ) algorithm. This
                 new algorithm is concerned with estimating the accuracy
                 levels of a dataset by assessing the quality of the
                 data with no prior knowledge of the dataset. The AAA
                 and associated metrics were tested using two canonical
                 BNs and one large-scale medical network. The article
                 presents the results regarding the efficacy of the
                 algorithm and the implications for future research and
                 practice.",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "accuracy levels; Bayesian networks; data quality
                 assessment; PC algorithm",
}

@Article{Even:2009:DAD,
  author =       "Adir Even and G. Shankaranarayanan",
  title =        "Dual Assessment of Data Quality in Customer
                 Databases",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "3",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1659225.1659228",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:55 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Quantitative assessment of data quality is critical
                 for identifying the presence of data defects and the
                 extent of the damage due to these defects. Quantitative
                 assessment can help define realistic quality
                 improvement targets, track progress, evaluate the
                 impacts of different solutions, and prioritize
                 improvement efforts accordingly. This study describes a
                 methodology for quantitatively assessing both impartial
                 {\em and\/} contextual data quality in large datasets.
                 Impartial assessment measures the extent to which a
                 dataset is defective, independent of the context in
                 which that dataset is used. Contextual assessment, as
                 defined in this study, measures the extent to which the
                 presence of defects reduces a dataset's utility, the
                 benefits gained by using that dataset in a specific
                 context. The dual assessment methodology is
                 demonstrated in the context of Customer Relationship
                 Management (CRM), using large data samples from
                 real-world datasets. The results from comparing the two
                 assessments offer important insights for directing
                 quality maintenance efforts and prioritizing quality
                 improvement solutions for this dataset. The study
                 describes the steps and the computation involved in the
                 dual-assessment methodology and discusses the
                 implications for applying the methodology in other
                 business contexts and data environments.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "CRM; customer relationship management; databases; Data
                 quality; information value; total data quality
                 management",
}

@Article{Fisher:2009:AMP,
  author =       "Craig W. Fisher and Eitel J. M. Lauria and Carolyn C.
                 Matheus",
  title =        "An Accuracy Metric: Percentages, Randomness, and
                 Probabilities",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "3",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1659225.1659229",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:55 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Practitioners and researchers regularly refer to error
                 rates or accuracy percentages of databases. The former
                 is the number of cells in error divided by the total
                 number of cells; the latter is the number of correct
                 cells divided by the total number of cells. However,
                 databases may have similar error rates (or accuracy
                 percentages) but differ drastically in the complexity
                 of their accuracy problems. A simple percent does not
                 provide information as to whether the errors are
                 systematic or randomly distributed throughout the
                 database. We expand the accuracy metric to include a
                 randomness measure and include a probability
                 distribution value. The proposed randomness check is
                 based on the Lempel--Ziv (LZ) complexity measure.
                 Through two simulation studies we show that the LZ
                 complexity measure can clearly differentiate as to
                 whether the errors are random or systematic. This
                 determination is a significant first step and is a
                 major departure from the percentage-alone technique.
                 Once it is determined that the errors are random, a
                 probability distribution, Poisson, is used to help
                 address various managerial questions.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "complexity; Data and information quality; randomness",
}

@Article{Ababneh:2009:CSE,
  author =       "Sufyan Ababneh and Rashid Ansari and Ashfaq Khokhar",
  title =        "Compensated Signature Embedding for Multimedia Content
                 Authentication",
  journal =      j-JDIQ,
  volume =       "1",
  number =       "3",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2009",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1659225.1659230",
  ISSN =         "1936-1955",
  bibdate =      "Wed Mar 17 14:47:55 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "One of the main goals of digital content
                 authentication and preservation techniques is to
                 guarantee the originality and quality of the
                 information. In this article, robust watermarking is
                 used to embed content-based fragile signatures in
                 multimedia signals to achieve efficient authentication
                 without requiring any third-party reference or side
                 information. To overcome the signature alteration
                 caused by the embedding perturbation and other possible
                 encoding operations, a closed-form compensation
                 technique is proposed for ensuring signature
                 consistency by employing a Lagrangian-based approach. A
                 minimum distortion criterion is used to ensure signal
                 quality. The effectiveness of the proposed approach is
                 investigated with simulations of examples of image
                 authentication in which signatures are designed to
                 reveal tamper localization. Results using quantitative
                 performance criteria show successful authentication
                 over a range of robustness in embedding watermarks
                 using both QIM-DM and spread-spectrum techniques. A
                 comparison with two iterative compensation schemes is
                 also presented.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "compensated signature embedding; Content
                 authentication; watermarking",
}

@Article{Madnick:2010:ECA,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "{Editors}' Comments: {ACM Journal of Data and
                 Information Quality (JDIQ)} is alive and well!",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805287",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Tremblay:2010:UDM,
  author =       "Monica Chiarini Tremblay and Kaushik Dutta and Debra
                 Vandermeer",
  title =        "Using Data Mining Techniques to Discover Bias Patterns
                 in Missing Data",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805288",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In today's data-rich environment, decision makers draw
                 conclusions from data repositories that may contain
                 data quality problems. In this context, missing data is
                 an important and known problem, since it can seriously
                 affect the accuracy of conclusions drawn. Researchers
                 have described several approaches for dealing with
                 missing data, primarily attempting to infer values or
                 estimate the impact of missing data on conclusions.
                 However, few have considered approaches to characterize
                 patterns of bias in missing data, that is, to determine
                 the specific attributes that predict the missingness of
                 data values. Knowledge of the specific systematic bias
                 patterns in the incidence of missing data can help
                 analysts more accurately assess the quality of
                 conclusions drawn from data sets with missing data.
                 This research proposes a methodology to combine a
                 number of Knowledge Discovery and Data Mining
                 techniques, including association rule mining, to
                 discover patterns in related attribute values that help
                 characterize these bias patterns. We demonstrate the
                 efficacy of our proposed approach by applying it on a
                 demo census dataset seeded with biased missing data.
                 The experimental results show that our approach was
                 able to find seeded biases and filter out most seeded
                 noise.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "Data quality; missing data; pattern discovery",
}

@Article{Jensen:2010:JCI,
  author =       "Matthew L. Jensen and Judee K. Burgoon and Jay F.
                 {Nunamaker, Jr.}",
  title =        "Judging the Credibility of Information Gathered from
                 Face-to-Face Interactions",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805289",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "One of the most pernicious threats to information
                 quality comes through perpetration of deception by
                 information suppliers. Deception undermines many
                 critical dimensions of information quality, such as
                 accuracy, completeness, and believability. Despite this
                 threat, information gatherers are ill equipped to
                 assess the credibility of information suppliers. This
                 work presents a prototype system that examines messages
                 gathered during direct, face-to-face information
                 gathering. The system unobtrusively identifies kinesic
                 and linguistic features that may indicate deception in
                 information suppliers' messages. System use was found
                 to significantly improve assessment ability in
                 between-subjects and within-subjects tests. The
                 improved ability to accurately assess credibility
                 during face-to-face interactions should yield higher
                 information quality.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "Credibility assessment; deception detection;
                 decision-aids; human-computer interaction; information
                 veracity; kinesics; linguistics",
}

@Article{Meda:2010:DDF,
  author =       "Hema S. Meda and Anup Kumar Sen and Amitava Bagchi",
  title =        "On Detecting Data Flow Errors in Workflows",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805290",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "When designing a business workflow, it is customary
                 practice to create the control flow structure first and
                 to ensure its correctness. Information about the flow
                 of data is introduced subsequently into the workflow
                 and its correctness is independently verified. Improper
                 specification of data requirements of tasks and XOR
                 splits can cause problems such as wrong branching at
                 XOR splits and the failure of tasks to execute. Here we
                 present a graph traversal algorithm called GTforDF for
                 detecting data flow errors in both nested and
                 unstructured workflows, and illustrate its operation on
                 realistic examples. Two of these have interconnected
                 loops and are free of control flow errors, and the
                 third one is an unstructured loop-free workflow. Our
                 approach extends and generalizes data flow verification
                 methods that have been recently proposed. It also makes
                 use of the concept of corresponding pairs lately
                 introduced in control flow verification. It thus has
                 the potential for development into a unified
                 algorithmic procedure for the concurrent detection of
                 control flow and data flow errors. The correctness of
                 the algorithm has been proved theoretically. It has
                 also been tested experimentally on many examples.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "Corresponding pair; Data flow errors; Workflow
                 management",
}

@Article{Magnani:2010:SUM,
  author =       "Matteo Magnani and Danilo Montesi",
  title =        "A Survey on Uncertainty Management in Data
                 Integration",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805291",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In the last few years, uncertainty management has come
                 to be recognized as a fundamental aspect of data
                 integration. It is now accepted that it may not be
                 possible to remove uncertainty generated during data
                 integration processes and that uncertainty in itself
                 may represent a source of relevant information. Several
                 issues, such as the aggregation of uncertain mappings
                 and the querying of uncertain mediated schemata, have
                 been addressed by applying well-known uncertainty
                 management theories. However, several problems lie
                 unresolved. This article sketches an initial picture of
                 this highly active research area; it details existing
                 works in the light of a homogeneous framework, and
                 identifies and discusses the leading issues awaiting
                 solutions.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
  keywords =     "Data integration; uncertainty",
}

@Article{Talburt:2010:CPS,
  author =       "John R. Talburt and Stuart E. Madnick and Yang W.
                 Lee",
  title =        "Call for Papers: Special Issue on Entity Resolution",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jul,
  year =         "2010",
  CODEN =        "????",
  DOI =          "http://doi.acm.org/10.1145/1805286.1805292",
  ISSN =         "1936-1955",
  bibdate =      "Tue Sep 7 08:41:54 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Madnick:2011:ESN,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Editorial: In Search of Novel Ideas and Solutions with
                 a Broader Context of Data Quality in Mind",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "2",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1891879.1891880",
  ISSN =         "1936-1955",
  bibdate =      "Mon Mar 28 12:03:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Blake:2011:EID,
  author =       "Roger Blake and Paul Mangiameli",
  title =        "The Effects and Interactions of Data Quality and
                 Problem Complexity on Classification",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1891879.1891881",
  ISSN =         "1936-1955",
  bibdate =      "Mon Mar 28 12:03:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Gelman:2011:GGA,
  author =       "Irit Askira Gelman",
  title =        "{GIGO} or not {GIGO}: The Accuracy of Multi-Criteria
                 Satisficing Decisions",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1891879.1891882",
  ISSN =         "1936-1955",
  bibdate =      "Mon Mar 28 12:03:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Fan:2011:GBN,
  author =       "Xiaoming Fan and Jianyong Wang and Xu Pu and Lizhu
                 Zhou and Bing Lv",
  title =        "On Graph-Based Name Disambiguation",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1891879.1891883",
  ISSN =         "1936-1955",
  bibdate =      "Mon Mar 28 12:03:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Ngugi:2011:TBI,
  author =       "Benjamin Ngugi and Beverly K. Kahn and Marilyn
                 Tremaine",
  title =        "Typing Biometrics: Impact of Human Learning on
                 Performance Quality",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/1891879.1891884",
  ISSN =         "1936-1955",
  bibdate =      "Mon Mar 28 12:03:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Madnick:2011:ENC,
  author =       "Stuart E. Madnick and Yang W. Lee",
  title =        "Editorial Notes: Classification and Assessment of
                 Large Amounts of Data: Examples in the Healthcare
                 Industry and Collaborative Digital Libraries",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "3",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2063504.2063505",
  ISSN =         "1936-1955",
  bibdate =      "Thu Dec 15 09:41:55 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Lauria:2011:CBT,
  author =       "Eitel J. M. Laur{\'\i}a and Alan D. March",
  title =        "Combining {Bayesian} Text Classification and Shrinkage
                 to Automate Healthcare Coding: {A} Data Quality
                 Analysis",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "3",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2063504.2063506",
  ISSN =         "1936-1955",
  bibdate =      "Thu Dec 15 09:41:55 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Dalip:2011:AAD,
  author =       "Daniel Hasan Dalip and Marcos Andr{\'e}
                 Gon{\c{c}}alves and Marco Cristo and P{\'a}vel Calado",
  title =        "Automatic Assessment of Document Quality in {Web}
                 Collaborative Digital Libraries",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "3",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2011",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2063504.2063507",
  ISSN =         "1936-1955",
  bibdate =      "Thu Dec 15 09:41:55 MST 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Muller:2012:IDQ,
  author =       "Heiko M{\"u}ller and Johann-Christoph Freytag and Ulf
                 Leser",
  title =        "Improving data quality by source analysis",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "4",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2107536.2107538",
  ISSN =         "1936-1955",
  bibdate =      "Fri Mar 16 15:01:48 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In many domains, data cleaning is hampered by our
                 limited ability to specify a comprehensive set of
                 integrity constraints to assist in identification of
                 erroneous data. An alternative approach to improve data
                 quality is to exploit different data sources that
                 contain information about the same set of objects. Such
                 overlapping sources highlight hot-spots of poor data
                 quality through conflicting data values and immediately
                 provide alternative values for conflict resolution. In
                 order to derive a dataset of high quality, we can merge
                 the overlapping sources based on a quality assessment
                 of the conflicting values. The quality of the resulting
                 dataset, however, is highly dependent on our ability to
                 asses the quality of conflicting values effectively.
                 The main objective of this article is to introduce
                 methods that aid the developer of an integrated system
                 over overlapping, but contradicting sources in the task
                 of improving the quality of data. Value conflicts
                 between contradicting sources are often systematic,
                 caused by some characteristic of the different sources.
                 Our goal is to identify such systematic differences and
                 outline data patterns that occur in conjunction with
                 them. Evaluated by an expert user, the regularities
                 discovered provide insights into possible conflict
                 reasons and help to assess the quality of inconsistent
                 values. The contributions of this article are two
                 concepts of systematic conflicts: contradiction
                 patterns and minimal update sequences. Contradiction
                 patterns resemble a special form of association rules
                 that summarize characteristic data properties for
                 conflict occurrence. We adapt existing association rule
                 mining algorithms for mining contradiction patterns.
                 Contradiction patterns, however, view each class of
                 conflicts in isolation, sometimes leading to largely
                 overlapping patterns. Sequences of set-oriented update
                 operations that transform one data source into the
                 other are compact descriptions for all regular
                 differences among the sources. We consider minimal
                 update sequences as the most likely explanation for
                 observed differences between overlapping data sources.
                 Furthermore, the order of operations within the
                 sequences point out potential dependencies between
                 systematic differences. Finding minimal update
                 sequences, however, is beyond reach in practice. We
                 show that the problem already is NP-complete for a
                 restricted set of operations. In the light of this
                 intractability result, we present heuristics that lead
                 to convincing results for all examples we considered.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Gelman:2012:BMC,
  author =       "Irit Askira Gelman",
  title =        "Biases in multi-criteria, satisfying decisions due to
                 data errors",
  journal =      j-JDIQ,
  volume =       "2",
  number =       "4",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2107536.2107539",
  ISSN =         "1936-1955",
  bibdate =      "Fri Mar 16 15:01:48 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "This inquiry centers on an asymmetry, or bias, in the
                 accuracy of multi-criteria, conjunctive, and
                 disjunctive decisions, which originates from
                 fundamental properties of the logical conjunction and
                 disjunction operations. A mathematical-statistical
                 analysis indicates that, as we keep adding criteria to
                 a multi-criteria conjunctive or disjunctive decision
                 rule, errors in the data produce decision errors
                 asymmetrically. As a result, in conjunctive decisions,
                 the probability of a false negative increases while the
                 probability of a false positive decreases. In contrast,
                 in disjunctive decisions, as we keep adding criteria,
                 the probability of a false positive increases while
                 that of a false negative decreases. For instance, in a
                 conjunctive business decision rule, the probability of
                 overlooking a bargain can be far greater than the
                 probability of misjudging an unattractive offer to be a
                 good one. A series of Monte Carlo simulations validates
                 the analytical findings and explores the contribution
                 of several additional factors.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Sachdeva:2012:SIS,
  author =       "Shelly Sachdeva and Subhash Bhalla",
  title =        "Semantic interoperability in standardized electronic
                 health record databases",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "1",
  pages =        "1:1--1:??",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2166788.2166789",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:12 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Different clinics and hospitals have their own
                 information systems to maintain patient data. This
                 hinders the exchange of data among systems (and
                 organizations). Hence there is a need to provide
                 standards for data exchange. In digitized form, the
                 individual patient's medical record can be stored,
                 retrieved, and shared over a network through
                 enhancement in information technology. Thus, electronic
                 health records (EHRs) should be standardized,
                 incorporating semantic interoperability. A subsequent
                 step requires that healthcare professionals and
                 patients get involved in using the EHRs, with the help
                 of technological developments. This study aims to
                 provide different approaches in understanding some
                 current and challenging concepts in health informatics.
                 Successful handling of these challenges will lead to
                 improved quality in healthcare by reducing medical
                 errors, decreasing costs, and enhancing patient care.
                 The study is focused on the following goals: (1)
                 understanding the role of EHRs; (2) understanding the
                 need for standardization to improve quality; (3)
                 establishing interoperability in maintaining EHRs; (4)
                 examining a framework for standardization and
                 interoperability (the openEHR architecture); (5)
                 identifying the role of archetypes for knowledge-based
                 systems; and (6) understanding the difficulties in
                 querying HER data.",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Brown:2012:DQT,
  author =       "Steven Brown and Trent S. Rosenbloom and Shawn P.
                 Hardenbrook and Terry Clark and Elliot Fielstein and
                 Peter Elkin and Ted Speroff",
  title =        "Documentation quality and time costs: a randomized
                 controlled trial of structured entry versus dictation",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "1",
  pages =        "2:1--2:??",
  month =        apr,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2166788.2166790",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:12 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "The Department of Veterans Affairs (VA) performs over
                 800,000 disability exams and distributes over
                 {\&}dollor;37 billion in disability benefits per year.
                 VA developed and deployed a computer-based disability
                 exam documentation system in order to improve exam
                 report quality and timeliness. We conducted a
                 randomized controlled trial comparing joint disability
                 examinations supported by computerized templates to the
                 examinations documented via dictation, to determine if
                 the system met the intended goals or had unintended
                 consequences. Consenting veterans were randomized to
                 undergo exams documented using computerized templates
                 or via dictation. We compared exam report quality,
                 documentation time costs, encounter length, total time
                 to fulfill an exam request with a finalized exam
                 report, and veteran satisfaction. Computer-based
                 templates resulted in disability exam reports that had
                 higher quality scores (p. 0.042) and were returned to
                 the requesting office faster than exam reports created
                 via dictation (p. 0.02). Documentation time and veteran
                 satisfaction were similar for both the documentation
                 techniques. Encounter length was significantly longer
                 for the template group. Computer-based templates
                 impacted the VA disability evaluation system by
                 improving report quality scores and production time and
                 lengthening encounter times. Oversight bodies have
                 called for mandated use of computer-based templates
                 nationwide. We believe mandates regarding use of health
                 information technology should be guided by data
                 regarding its positive and negative impacts.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Sunyaev:2012:SCD,
  author =       "Ali Sunyaev and Dmitry Chornyi",
  title =        "Supporting chronic disease care quality: Design and
                 implementation of a health service and its integration
                 with electronic health records",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "2",
  pages =        "3:1--3:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2184442.2184443",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:12 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Chronic medical conditions take a huge toll on lives
                 of a growing number of people and are a major
                 contributor to the rising costs in healthcare. As
                 patients are increasingly willing to take an active
                 part in managing their conditions, chronic disease
                 self-management programs and information systems that
                 support them are recognized for their potential to
                 improve the quality of healthcare delivery. These
                 programs often rely on recording longitudinal patient
                 data and analyzing it. Therefore, maintaining
                 appropriate data quality is important for
                 self-management programs to be efficient and safe. We
                 designed and implemented a prototype of a health
                 self-management service for chronically ill people. It
                 is a distributed application that supports patients
                 with diabetes at tracking their blood glucose levels.
                 The main design goals were usability, extensibility,
                 security, and interoperability. The system integrates
                 with the Microsoft HealthVault and Google Health
                 personal health record platforms. It utilizes
                 industry-strength storage and security mechanisms, is
                 scalable, and as a result, can be used to gather,
                 securely store, and analyze patient data over long
                 periods of time. In this article we examine how
                 software information technology can support chronic
                 disease self-management and its impact on the quality
                 of patient data. Furthermore, we describe the
                 requirements that drove the system's development, its
                 architecture, and design decisions.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Elizabeth:2012:NSA,
  author =       "D. Shiloah Elizabeth and H. Khanna Nehemiah and C.
                 Sunil Retmin Raj and A. Kannan",
  title =        "A novel segmentation approach for improving diagnostic
                 accuracy of {CAD} systems for detecting lung cancer
                 from chest computed tomography images",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "2",
  pages =        "4:1--4:??",
  month =        may,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2184442.2184444",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:12 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Segmentation of lung tissue is an important and
                 challenging task in any computer aided diagnosis
                 system. The accuracy of the segmentation subsystem
                 determines the performance of the other subsystems in
                 any computer aided diagnosis system based on image
                 analysis. We propose a novel technique for segmentation
                 of lung tissue from computed tomography of the chest.
                 Manual segmentation of lung parenchyma becomes
                 difficult with an enormous volume of images. The goal
                 of this work is to present an automated approach to
                 segmentation of lung parenchyma from the rest of the
                 chest CT image. The approach involves the conventional
                 optimal thresholding technique and operations based on
                 convex edge and centroid properties of the lung region.
                 The segmentation technique proposed in this article can
                 be used to preprocess lung images given to a computer
                 aided diagnosis system for diagnosis of lung disorders.
                 This improves the diagnostic performance of the system.
                 This has been tested by using it in a computer aided
                 diagnosis system that was used for detection of lung
                 cancer from chest computed tomography images. The
                 results obtained show that the lungs can be correctly
                 segmented even in the presence of peripheral pathology
                 bearing regions; pathology bearing regions that could
                 not be detected using a CAD system that applies optimal
                 thresholding could be detected using a CAD system using
                 out proposed approach for segmentation of lungs.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Yakout:2012:EPA,
  author =       "Mohamed Yakout and Mikhail J. Atallah and Ahmed
                 Elmagarmid",
  title =        "Efficient and Practical Approach for Private Record
                 Linkage",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "3",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2287714.2287715",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:13 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Record linkage is used to associate entities from
                 multiple data sources. For example, two organizations
                 contemplating a merger may want to know how common
                 their customer bases are so that they may better assess
                 the benefits of the merger. Another example is a
                 database of people who are forbidden from a certain
                 activity by regulators, may need to be compared to a
                 list of people engaged in that activity. The autonomous
                 entities who wish to carry out the record matching
                 computation are often reluctant to fully share their
                 data; they fear losing control over its subsequent
                 dissemination and usage, or they want to insure privacy
                 because the data is proprietary or confidential, and/or
                 they are cautious simply because privacy laws forbid
                 its disclosure or regulate the form of that disclosure.
                 In such cases, the problem of carrying out the linkage
                 computation without full data exchange has been called
                 private record linkage. Previous private record linkage
                 techniques have made use of a third party. We provide
                 efficient techniques for private record linkage that
                 improve on previous work in that (1) our techniques
                 make no use of a third party, and (2) they achieve much
                 better performance than previous schemes in terms of
                 their execution time while maintaining acceptable
                 quality of output compared to nonprivacy settings. Our
                 protocol consists of two phases. The first phase
                 primarily produces candidate record pairs for matching,
                 by carrying out a very fast (but not accurate) matching
                 between such pairs of records. The second phase is a
                 novel protocol for efficiently computing distances
                 between each candidate pair (without any expensive
                 cryptographic operations such as modular
                 exponentiations). Our experimental evaluation of our
                 approach validates these claims.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Yang:2012:ECD,
  author =       "Yanjuan Yang and Michael Mannino",
  title =        "An Experimental Comparison of a Document Deception
                 Detection Policy using Real and Artificial Deception",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "3",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2287714.2287716",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:13 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Developing policies to screen documents for deception
                 is often hampered by the cost of data collection and
                 the inability to evaluate policy alternatives due to
                 lack of data. To lower data collection costs and
                 increase the amount of data, artificially generated
                 deception data can be used, but the impact of using
                 artificially generated deception data is not well
                 understood. This article studies the impact of
                 artificially generated deception on document screening
                 policies. The deception and truth data were collected
                 from financial aid applications, a document-centric
                 area with limited resources for screening. Real
                 deception was augmented with artificial data generated
                 by noise and deception generation models. Using the
                 real data and artificially generated data, we designed
                 an innovative experiment with deception type and
                 deception rate as factors, and harmonic mean and cost
                 as outcome variables. We used two budget models (fixed
                 and variable) typically employed by financial aid
                 offices to measure the cost of noncompliance in
                 financial aid applications. The analysis included an
                 evaluation of a common policy for deception screening
                 using both fixed and varying screening rates. The
                 results of the experiment provided evidence of similar
                 performance of screening policy with real and
                 artificial deception, suggesting the possibility of
                 using artificially generated deception to reduce the
                 costs associated with obtaining training data.",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Robb:2012:INU,
  author =       "David A. Robb and Paul L. Bowen and A. Faye Borthick
                 and Fiona H. Rohde",
  title =        "Improving New Users' Query Performance: Deterring
                 Premature Stopping of Query Revision with Information
                 for Forming Ex Ante Expectations",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "4",
  pages =        "7:1--7:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2348828.2348829",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "As the volume of data in organizational databases
                 grows, organizations are seeking to use this data to
                 improve organizational success. To this end, users are
                 being asked to query these databases to provide
                 information to help answer questions posed by key
                 management personnel. Users who have had extensive
                 experience with an organization's data can often detect
                 the presence of errors in their queries when query
                 results do not correspond to their ex ante
                 expectations. New users, however, are less familiar
                 with the data they will be querying. Having no, or
                 limited, ex ante expectations for query results, new
                 users may be unaware that the result produced by their
                 query is incorrect. Unwarranted confidence in the
                 correctness of their queries predisposes these users to
                 stop looking for query errors even when their queries
                 still contain errors. This behavior, premature stopping
                 of query revision, prompts investigating whether new
                 users' query performance would improve if they were not
                 only provided with, but used, readily available
                 information to form ex ante expectations. Our results
                 demonstrated a threshold effect in new users heeding
                 information for forming ex ante expectations. That is,
                 the mere availability of information for forming ex
                 ante expectations made no difference in query
                 performance. When admonishing users to heed ex ante
                 information, however, there was an associated increase
                 in the accuracy of their queries. These results suggest
                 that users unfamiliar with a particular database might
                 make fewer query errors if they not only received
                 readily available information but were then prompted to
                 use the information to form ex ante expectations for
                 query results.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Varol:2012:HMA,
  author =       "Cihan Varol and Coskun Bayrak",
  title =        "Hybrid Matching Algorithm for Personal Names",
  journal =      j-JDIQ,
  volume =       "3",
  number =       "4",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2348828.2348830",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib;
                 http://www.math.utah.edu/pub/tex/bib/spell.bib",
  abstract =     "Companies acquire personal information from phone,
                 World Wide Web, or email in order to sell or send an
                 advertisement about their product. However, when this
                 information is acquired, moved, copied, or edited, the
                 data may lose its quality. Often, the use of data
                 administrators or a tool that has limited capabilities
                 to correct the mistyped information can cause many
                 problems. Moreover, most of the correction techniques
                 are particularly implemented for the words used in
                 daily conversations. Since personal names have
                 different characteristics compared to general text, a
                 hybrid matching algorithm (PNRS) which employs phonetic
                 encoding, string matching and statistical facts to
                 provide a possible candidate for misspelled names is
                 developed. At the end, the efficiency of the proposed
                 algorithm is compared with other well known spelling
                 correction techniques.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{ODonoghue:2012:ISI,
  author =       "John O'Donoghue and Jane Grimson and Katherine
                 Seelman",
  title =        "Introduction to the Special Issue on Information
                 Quality: The Challenges and Opportunities in Healthcare
                 Systems and Services",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "1",
  pages =        "1:1--1:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2378016.2378017",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "1",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Collins:2012:CGF,
  author =       "Claire Collins and Kelly Janssens",
  title =        "Creating a General (Family) Practice Epidemiological
                 Database in {Ireland} --- Data Quality Issue
                 Management",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "1",
  pages =        "2:1--2:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2378016.2378018",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In Ireland, while detailed information is available
                 regarding hospital attendance, little is known
                 regarding general (family) practice attendance.
                 However, it is conservatively estimated that there are
                 almost nine times as many general practice encounters
                 than there are hospital encounters each year in
                 Ireland. This represents a very significant gap in
                 health information. Indeed, general practice has been
                 shown in other countries to be an important and rich
                 source of information about the health of the
                 population, their behaviors and their utilization of
                 health services. Funded by the Health Information and
                 Quality Authority (HIQA), the Irish College of General
                 Practitioners (ICGP) undertook a feasibility study of
                 diagnostic coding of routinely entered patient data and
                 the creation of a national general practice morbidity
                 and epidemiological database (GPMED project). This
                 article outlines the process of data quality issue
                 management undertaken. The study's findings suggest
                 that the quality of data collection and reporting
                 structures available in general practice throughout
                 Ireland at the outset of this project were not adequate
                 to permit the creation of a database of sufficient
                 quality for service planning and policy or
                 epidemiological research. Challenges include the dearth
                 of a minimum standard of data recorded in consultations
                 by GPs and the absence of the digital data recording
                 and exporting infrastructure within Irish patient
                 management software systems. In addition, there is at
                 present a lack of recognition regarding the value of
                 such data for patient management and service
                 planning---including importantly, data collectors who
                 do not fully accept the merit of maintaining data,
                 which has a direct consequence for data quality. The
                 work of this project has substantial implications for
                 the data available to the health sector in Ireland and
                 contributes to the knowledge base internationally
                 regarding general practice morbidity data.",
  acknowledgement = ack-nhfb,
  articleno =    "2",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Cure:2012:IDQ,
  author =       "Olivier Cur{\'e}",
  title =        "Improving the Data Quality of Drug Databases using
                 Conditional Dependencies and Ontologies",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "1",
  pages =        "3:1--3:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2378016.2378019",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Many health care systems and services exploit drug
                 related information stored in databases. The poor data
                 quality of these databases, e.g. inaccuracy of drug
                 contraindications, can lead to catastrophic
                 consequences for the health condition of patients.
                 Hence it is important to ensure their quality in terms
                 of data completeness and soundness. In the database
                 domain, standard Functional Dependencies (FDs) and
                 INclusion Dependencies (INDs), have been proposed to
                 prevent the insertion of incorrect data. But they are
                 generally not expressive enough to represent a
                 domain-specific set of constraints. To this end,
                 conditional dependencies, i.e. standard dependencies
                 extended with tableau patterns containing constant
                 values, have been introduced and several methods have
                 been proposed for their discovery and representation.
                 The quality of drug databases can be considerably
                 improved by their usage. Moreover, pharmacology
                 information is inherently hierarchical and many
                 standards propose graph structures to represent them,
                 e.g. the Anatomical Therapeutic Chemical classification
                 (ATC) or OpenGalen's terminology. In this article, we
                 emphasize that the technologies of the Semantic Web are
                 adapted to represent these hierarchical structures,
                 i.e. in RDFS and OWL. We also present a solution for
                 representing conditional dependencies using a query
                 language defined for these graph oriented structures,
                 namely SPARQL. The benefits of this approach are
                 interoperability with applications and ontologies of
                 the Semantic Web as well as a reasoning-based query
                 execution solution to clean underlying databases.",
  acknowledgement = ack-nhfb,
  articleno =    "3",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{McNaull:2012:DIQ,
  author =       "James McNaull and Juan Carlos Augusto and Maurice
                 Mulvenna and Paul McCullagh",
  title =        "Data and Information Quality Issues in Ambient
                 Assisted Living Systems",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "1",
  pages =        "4:1--4:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2378016.2378020",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Demographic aging, as a result of people living for
                 longer, has put an increased burden on health and
                 social care provision across most of the economies of
                 the developed and developing world. In order to cope
                 with the greater numbers of older people, together with
                 increasing prevalence of chronic diseases, governments
                 are looking to new ways to provide care and support to
                 older people and their care providers. A growing trend
                 is where health and social care providers are moving
                 towards the use of assisted living technologies to
                 provide care and assistance in the home. In this
                 article, the research area of Ambient Assisted Living
                 (AAL) systems is examined and the data, information and
                 the higher-level contextual knowledge quality issues in
                 relation to these systems, is discussed. Lack of
                 quality control may result in an AAL system providing
                 assistance and support based upon incorrect data,
                 information and knowledge inputs, and this may have a
                 detrimental effect on the person making use of the
                 system. We propose a model whereby contextual knowledge
                 gained during the AAL system's reasoning cycle can be
                 fed back to aid in further quality checking at the
                 various architectural layers, and a realistic AAL
                 scenario is provided to support this. Future research
                 should be conducted in these areas, with the
                 requirement of building quality criteria into the
                 design and implementation of AAL systems.",
  acknowledgement = ack-nhfb,
  articleno =    "4",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{ODonoghue:2012:DMW,
  author =       "John O'Donoghue and John Herbert",
  title =        "Data Management within {mHealth} Environments: Patient
                 Sensors, Mobile Devices, and Databases",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "1",
  pages =        "5:1--5:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2378016.2378021",
  ISSN =         "1936-1955",
  bibdate =      "Thu Nov 8 18:27:14 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Pervasive environments generate large quantities of
                 data, originating from backend servers, portable
                 devices, and wireless mobile sensors. Pervasive sensing
                 devices that monitor properties of the environment
                 (including human beings) can be a large data source.
                 Unprocessed datasets may include data that is faulty
                 and irrelevant, and data that is important and useful.
                 If not managed correctly the large amount of data from
                 a data-rich pervasive environment may result in
                 information overload or delivery of incorrect
                 information. Context-sensitive quality data management
                 aims to gather, verify, process, and manage the
                 multiple data sources in a pervasive environment in
                 order to deliver high quality, relevant information to
                 the end-user. Managing the quality of data from
                 different sources, correlating related data, and making
                 use of context, are all essential in providing end
                 users with accurate and meaningful data in real time.
                 This requirement is especially true for critical
                 applications such as in a medical environment. This
                 article presents the Data Management System (DMS)
                 architecture. It is designed to deliver quality data
                 service to its users. The DMS architecture employs an
                 agent-based middleware to intelligently and effectively
                 manage all pervasive data sources, and to make use of
                 context to deliver relevant information to the
                 end-user. Two of the DMS components are presented: (1)
                 data validation and (2) data consistency. The DMS
                 components have been rigorously evaluated using various
                 medical-based test cases. This article demonstrates a
                 careful, precise approach to data based on the quality
                 of the data and the context of its use. It emphasises
                 the DMS architecture and the role of software agents in
                 providing quality data management.",
  acknowledgement = ack-nhfb,
  articleno =    "5",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Talburt:2013:SIE,
  author =       "John R. Talburt",
  title =        "Special Issue on Entity Resolution Overview: The
                 Criticality of Entity Resolution in Data and
                 Information Quality",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "2",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435221.2435222",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:00 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "6",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Song:2013:DIE,
  author =       "Dezhao Song and Jeff Heflin",
  title =        "Domain-Independent Entity Coreference for Linking
                 Ontology Instances",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "2",
  pages =        "7:1--7:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435221.2435223",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:00 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "The objective of entity coreference is to determine if
                 different mentions (e.g., person names, place names,
                 database records, ontology instances, etc.) refer to
                 the same real word object. Entity coreference
                 algorithms can be used to detect duplicate database
                 records and to determine if two Semantic Web instances
                 represent the same underlying real word entity. The key
                 issues in developing an entity coreference algorithm
                 include how to locate context information and how to
                 utilize the context appropriately. In this article, we
                 present a novel entity coreference algorithm for
                 ontology instances. For scalability reasons, we select
                 a neighborhood of each instance from an RDF graph. To
                 determine the similarity between two instances, our
                 algorithm computes the similarity between comparable
                 property values in the neighborhood graphs. The
                 similarity of distinct URIs and blank nodes is computed
                 by comparing their outgoing links. In an attempt to
                 reduce the impact of distant nodes on the final
                 similarity measure, we explore a distance-based
                 discounting approach. To provide the best possible
                 domain-independent matches, we propose an approach to
                 compute the discriminability of triples in order to
                 assign weights to the context information. We evaluated
                 our algorithm using different instance categories from
                 five datasets. Our experiments show that the best
                 results are achieved by including both our discounting
                 and triple discrimination approaches.",
  acknowledgement = ack-nhfb,
  articleno =    "7",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Nuray-Turan:2013:ACS,
  author =       "Rabia Nuray-Turan and Dmitri V. Kalashnikov and Sharad
                 Mehrotra",
  title =        "Adaptive Connection Strength Models for
                 Relationship-Based Entity Resolution",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "2",
  pages =        "8:1--8:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435221.2435224",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:00 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Entity Resolution (ER) is a data quality challenge
                 that deals with ambiguous references in data and whose
                 task is to identify all references that co-refer. Due
                 to practical significance of the ER problem, many
                 creative ER techniques have been proposed in the past,
                 including those that analyze relationships that exist
                 among entities in data. Such approaches view the
                 database as an entity-relationship graph, where direct
                 and indirect relationships correspond to paths in the
                 graph. These techniques rely on measuring the
                 connection strength among various nodes in the graph by
                 using a connection strength (CS) model. While such
                 approaches have demonstrated significant advantage over
                 traditional ER techniques, currently they also have a
                 significant limitation: the CS models that they use are
                 intuition-based fixed models that tend to behave well
                 in general, but are very generic and not tuned to a
                 specific domain, leading to suboptimal result quality.
                 Hence, in this article we propose an approach that
                 employs supervised learning to adapt the connection
                 strength measure to the given domain using the
                 available past/training data. The adaptive approach has
                 several advantages: it increases both the quality and
                 efficiency of ER and it also minimizes the domain
                 analyst participation needed to tune the CS model to
                 the given domain. The extensive empirical evaluation
                 demonstrates that the proposed approach reaches up to
                 8\% higher accuracy than the graph-based ER methods
                 that use fixed and intuition-based CS models.",
  acknowledgement = ack-nhfb,
  articleno =    "8",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Panse:2013:IHU,
  author =       "Fabian Panse and Maurice van Keulen and Norbert
                 Ritter",
  title =        "Indeterministic Handling of Uncertain Decisions in
                 Deduplication",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "2",
  pages =        "9:1--9:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435221.2435225",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:00 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "In current research and practice, deduplication is
                 usually considered as a deterministic approach in which
                 database tuples are either declared to be duplicates or
                 not. In ambiguous situations, however, it is often not
                 completely clear-cut, which tuples represent the same
                 real-world entity. In deterministic approaches, many
                 realistic possibilities may be ignored, which in turn
                 can lead to false decisions. In this article, we
                 present an indeterministic approach for deduplication
                 by using a probabilistic target model including
                 techniques for proper probabilistic interpretation of
                 similarity matching results. Thus, instead of deciding
                 for one of the most likely situations, all realistic
                 situations are modeled in the resultant data. This
                 approach minimizes the negative impact of false
                 decisions. Moreover, the deduplication process becomes
                 almost fully automatic and human effort can be largely
                 reduced. To increase applicability, we introduce
                 several semi-indeterministic methods that heuristically
                 reduce the set of indeterministically handled decisions
                 in several meaningful ways. We also describe a
                 full-indeterministic method for theoretical and
                 presentational reasons.",
  acknowledgement = ack-nhfb,
  articleno =    "9",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Zhou:2013:GLC,
  author =       "Yinle Zhou and Eric Nelson and Fumiko Kobayashi and
                 John R. Talburt",
  title =        "A Graduate-Level Course on Entity Resolution and
                 Information Quality: a Step toward {ER} Education",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "2",
  pages =        "10:1--10:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2435221.2435226",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:00 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "This article discusses the topics, approaches, and
                 lessons learned in teaching a graduate-level course
                 covering entity resolution (ER) and its relationship to
                 information quality (IQ). The course surveys a broad
                 spectrum of ER topics and activities including entity
                 reference extraction, entity reference preparation,
                 entity reference resolution techniques, entity identity
                 management, and entity relationship analysis. The
                 course content also attempts to balance aspects of ER
                 theory with practical application through a series of
                 laboratory exercises coordinated with the lecture
                 topics. As an additional teaching aid, a configurable,
                 open-source entity resolution engine (OYSTER) was
                 developed that allows students to experience with
                 different types of ER architectures including
                 merge-purge, record linking, identity resolution, and
                 identity capture.",
  acknowledgement = ack-nhfb,
  articleno =    "10",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Cao:2013:NAD,
  author =       "Lan Cao and Hongwei Zhu",
  title =        "Normal accidents: Data quality problems in
                 {ERP}-enabled manufacturing",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "3",
  pages =        "11:1--11:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:05 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "The efficient operation of Enterprise Resource
                 Planning (ERP) systems largely depends on data quality.
                 ERP can improve data quality and information sharing
                 within an organization. It can also pose challenges to
                 data quality. While it is well known that data quality
                 is important in ERP systems, most existing research has
                 focused on identifying the factors affecting the
                 implementation and the business values of ERP. With
                 normal accident theory as a theoretical lens, we
                 examine data quality problems in ERP using a case study
                 of a large, fast-growing multinational manufacturer
                 headquartered in China. Our findings show that
                 organizations that have successfully implemented ERP
                 can still experience certain data quality problems. We
                 identify major data quality problems in data
                 production, storage and maintenance, and utilization
                 processes. We also analyze the causes of these data
                 quality problems by linking them to certain
                 characteristics of ERP systems within an organizational
                 context. Our analysis shows that problems resulting
                 from the tight coupling effects and the complexity of
                 ERP-enabled manufacturing systems can be inevitable.
                 This study will help researchers and practitioners
                 formulate data management strategies that are effective
                 in the presence of certain ``normal'' data quality
                 problems.",
  acknowledgement = ack-nhfb,
  articleno =    "11",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Biran:2013:CII,
  author =       "Dov Biran and Michael H. Zack and Richard J. Briotta",
  title =        "Competitive intelligence and information quality: a
                 game-theoretic perspective",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "3",
  pages =        "12:1--12:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:05 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "To better understand a competitor's tactical and
                 strategic plans, companies need to take a closer look
                 at competitive intelligence or they risk missing
                 lucrative opportunities. Because of this there is a
                 growing interest in competitive intelligence and
                 intelligence information gathering systems (IIS). This
                 article uses game-theoretic concepts to develop an
                 analytic framework to assess the value of deploying a
                 competitive intelligence gathering information system.
                 Modeling the competitive environment as a game provides
                 a useful approach to study and evaluate competitive
                 strategies given diverse assumptions about the quality
                 of the information known by the players. When
                 determining the value of deploying an IIS, decision
                 makers need to examine three components of the
                 competitive environment: the competitive rules of the
                 game, the state of player knowledge, and the
                 reliability of the information gathered. This framework
                 focuses on competitive environments where the players'
                 state of knowledge (i.e., common versus covert
                 knowledge) and the reliability of the information
                 generated are essential to the decision making process.
                 The article concludes with implications for research
                 and practice.",
  acknowledgement = ack-nhfb,
  articleno =    "12",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Joglekar:2013:AAD,
  author =       "Nitin R. Joglekar and Edward G. Anderson and G.
                 Shankaranarayanan",
  title =        "Accuracy of aggregate data in distributed project
                 settings: Model, analysis and implications",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "3",
  pages =        "13:1--13:??",
  month =        may,
  year =         "2013",
  CODEN =        "????",
  ISSN =         "1936-1955",
  bibdate =      "Sat Jun 22 12:13:05 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "We examine the management of data accuracy in
                 inter-organizational data exchanges using the context
                 of distributed software projects. Organizations
                 typically manage projects by outsourcing portions of
                 the project to partners. Managing a portfolio of such
                 projects requires sharing data regarding the status of
                 work-in-progress residing with the partners and
                 estimates of these projects' completion times.
                 Portfolio managers use these data to assign projects to
                 be outsourced to partners. These data are rarely
                 accurate. Unless these data are filtered, inaccuracies
                 can lead to myopic and expensive sourcing decisions. We
                 develop a model that uses project-status data to
                 identify an optimal assignment of projects to be
                 outsourced. This model permits corruption of
                 project-status data. We use this model to compute the
                 costs of using perfect versus inaccurate project-status
                 data and show that the costs of deviation from optimal
                 are sizable when the inaccuracy in the data is
                 significant. We further propose a filter to correct
                 inaccurate project-status data and generate an estimate
                 of true progress. With this filter, depending on the
                 relative magnitudes of errors, we show that accuracy of
                 project-status data can be improved and the associated
                 economic benefit is significant. We illustrate the
                 improvement in accuracy and associated economic benefit
                 by instantiating the model and the filter. We further
                 elaborate on how the model parameters may be estimated
                 and used in practice.",
  acknowledgement = ack-nhfb,
  articleno =    "13",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Raschid:2014:E,
  author =       "Louiqa Raschid",
  title =        "Editorial",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "4",
  pages =        "14:1--14:??",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2579167",
  ISSN =         "1936-1955",
  bibdate =      "Tue May 27 16:54:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  acknowledgement = ack-nhfb,
  articleno =    "14",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Wijnhoven:2014:VBF,
  author =       "Fons Wijnhoven and Chintan Amrit and Pim Dietz",
  title =        "Value-Based File Retention: File Attributes as File
                 Value and Information Waste Indicators",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "4",
  pages =        "15:1--15:??",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2567656",
  ISSN =         "1936-1955",
  bibdate =      "Tue May 27 16:54:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Several file retention policy methods propose that a
                 file retention policy should be based on file value.
                 Though such a retention policy might increase the value
                 of accessible files, the method to arrive at such a
                 policy is under-researched. This article discusses how
                 one can arrive at a method for developing file
                 retention policies based on the use values of files.
                 The method's applicability is initially assessed
                 through a case study at Capgemini, Netherlands. In the
                 case study, we hypothesize that one can develop a file
                 retention policy by testing causal relations between
                 file attributes (as used by file retention methods) and
                 the use value of files. Unfortunately, most file
                 attributes used by file retention methods have a weak
                 correlation with file value, resulting in the
                 conclusion that these methods do not well select out
                 high- and low-value files. This would imply the
                 ineffectiveness of the used attributes in our study or
                 errors in our conceptualization of file value. We
                 continue with the last possibility and develop
                 indicators for file utility (with low utility being
                 waste). With this approach we were able to detect waste
                 files, in a sample of files, with an accuracy of 80\%.
                 We therefore not only suggest further research in
                 information waste detection as part of a file retention
                 policy, but also to further explore other file
                 attributes that could better predict file value and
                 file utility.",
  acknowledgement = ack-nhfb,
  articleno =    "15",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Fan:2014:IBR,
  author =       "Wenfei Fan and Shuai Ma and Nan Tang and Wenyuan Yu",
  title =        "Interaction between Record Matching and Data
                 Repairing",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "4",
  pages =        "16:1--16:??",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2567657",
  ISSN =         "1936-1955",
  bibdate =      "Tue May 27 16:54:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Central to a data cleaning system are record matching
                 and data repairing. Matching aims to identify tuples
                 that refer to the same real-world object, and repairing
                 is to make a database consistent by fixing errors in
                 the data by using integrity constraints. These are
                 typically treated as separate processes in current data
                 cleaning systems, based on heuristic solutions. This
                 article studies a new problem in connection with data
                 cleaning, namely the interaction between record
                 matching and data repairing. We show that repairing can
                 effectively help us identify matches, and vice versa.
                 To capture the interaction, we provide a uniform
                 framework that seamlessly unifies repairing and
                 matching operations to clean a database based on
                 integrity constraints, matching rules, and master data.
                 We give a full treatment of fundamental problems
                 associated with data cleaning via matching and
                 repairing, including the static analyses of constraints
                 and rules taken together, and the complexity,
                 termination, and determinism analyses of data cleaning.
                 We show that these problems are hard, ranging from
                 NP-complete or coNP-complete, to PSPACE-complete.
                 Nevertheless, we propose efficient algorithms to clean
                 data via both matching and repairing. The algorithms
                 find deterministic fixes and reliable fixes based on
                 confidence and entropy analyses, respectively, which
                 are more accurate than fixes generated by heuristics.
                 Heuristic fixes are produced only when deterministic or
                 reliable fixes are unavailable. We experimentally
                 verify that our techniques can significantly improve
                 the accuracy of record matching and data repairing that
                 are taken as separate processes, using real-life and
                 synthetic data.",
  acknowledgement = ack-nhfb,
  articleno =    "16",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}

@Article{Martin:2014:MAE,
  author =       "Nigel Martin and Alexandra Poulovassilis and Jianing
                 Wang",
  title =        "A Methodology and Architecture Embedding Quality
                 Assessment in Data Integration",
  journal =      j-JDIQ,
  volume =       "4",
  number =       "4",
  pages =        "17:1--17:??",
  month =        may,
  year =         "2014",
  CODEN =        "????",
  DOI =          "http://dx.doi.org/10.1145/2567663",
  ISSN =         "1936-1955",
  bibdate =      "Tue May 27 16:54:25 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/jdqi/;
                 http://www.math.utah.edu/pub/tex/bib/jdiq.bib",
  abstract =     "Data integration aims to combine heterogeneous
                 information sources and to provide interfaces for
                 accessing the integrated resource. Data integration is
                 a collaborative task that may involve many people with
                 different degrees of experience, knowledge of the
                 application domain, and expectations relating to the
                 integrated resource. It may be difficult to determine
                 and control the quality of an integrated resource due
                 to these factors. In this article, we propose a data
                 integration methodology that has embedded within it
                 iterative quality assessment and improvement of the
                 integrated resource. We also propose an architecture
                 for the realisation of this methodology. The quality
                 assessment is based on an ontology representation of
                 different users' quality requirements and of the main
                 elements of the integrated resource. We use description
                 logic as the formal basis for reasoning about users'
                 quality requirements and for validating that an
                 integrated resource satisfies these requirements. We
                 define quality factors and associated metrics which
                 enable the quality of alternative global schemas for an
                 integrated resource to be assessed quantitatively, and
                 hence the improvement which results from the refinement
                 of a global schema following our methodology to be
                 measured. We evaluate our approach through a
                 large-scale real-life case study in biological data
                 integration in which an integrated resource is
                 constructed from three autonomous proteomics data
                 sources.",
  acknowledgement = ack-nhfb,
  articleno =    "17",
  fjournal =     "Journal of Data and Information Quality (JDIQ)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1191",
}