Valid HTML 4.0! Valid CSS!
%%% -*-BibTeX-*-
%%% ====================================================================
%%%  BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.98",
%%%     date            = "19 March 2024",
%%%     time            = "08:15:22 MST",
%%%     filename        = "vldbj.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "https://www.math.utah.edu/~beebe",
%%%     checksum        = "25760 45484 242880 2371637",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "BibTeX; bibliography; Very Large Data Bases
%%%                        Journal; VLDB Journal",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE bibliography of
%%%                        publications in the VLDB journal: Very Large
%%%                        Data Bases (CODEN VLDBFR, ISSN 1066-8888
%%%                        (print), 0949-877X (electronic)), originally
%%%                        published by Springer-Verlag on behalf of the
%%%                        VLDB Endowment, and now published by the ACM.
%%%
%%%                        Publication of the VLDB Journal begain with
%%%                        volume 1, number 1, in 1992, and the journal
%%%                        is normally published quarterly, although
%%%                        occasionally, issues are combined, or volumes
%%%                        are split across year boundaries.
%%%
%%%                        There is an editorial World Wide Web site at
%%%
%%%                            http://SunSITE.Informatik.RWTH-Aachen.DE/dblp/db/journals/vldb/
%%%
%%%                        and publisher Web sites at
%%%
%%%                            http://portal.acm.org/toc.cfm?id=J869
%%%                            http://link.springer.de/link/service/journals/00778/index.htm
%%%
%%%                        At version 1.98, the year coverage looked
%%%                        like this:
%%%
%%%                             1992 (   7)    2003 (  23)    2014 (  44)
%%%                             1993 (  19)    2004 (  23)    2015 (  37)
%%%                             1994 (  22)    2005 (  22)    2016 (  40)
%%%                             1995 (  24)    2006 (  24)    2017 (  39)
%%%                             1996 (  18)    2007 (  26)    2018 (  38)
%%%                             1997 (  22)    2008 (  68)    2019 (  41)
%%%                             1998 (  22)    2009 (  53)    2020 (  63)
%%%                             1999 (  13)    2010 (  45)    2021 (  46)
%%%                             2000 (  29)    2011 (  41)    2022 (  59)
%%%                             2001 (  25)    2012 (  40)    2023 (  59)
%%%                             2002 (  23)    2013 (  38)    2024 (  24)
%%%
%%%                             Article:       1117
%%%
%%%                             Total entries: 1117
%%%
%%%                        This bibliography was prepared largely from
%%%                        the Web pages at the editorial and publisher
%%%                        sites.
%%%
%%%                        Spelling has been verified with the UNIX
%%%                        spell and GNU ispell programs using the
%%%                        exception dictionary stored in the companion
%%%                        file with extension .sok.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order within each journal,
%%%                        using bibsort -bypages.
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility.",
%%%  }
%%% ====================================================================
@Preamble{
    "\ifx \undefined \ocirc        \def \ocirc  #1{{\accent'27#1}}        \fi" #
    "\ifx \undefined \varvec       \def \varvec #1{\hbox{\boldmath $#1$}} \fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:
@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|https://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:
@String{j-VLDB-J = "VLDB Journal: Very Large Data Bases"}

%%% ====================================================================
%%% Bibliography entries, sorted in publication order:
@Article{Breitbart:1992:TMI,
  author =       "Yuri Breitbart and Abraham Silberschatz and Glenn R.
                 Thompson",
  title =        "Transaction Management Issues in a Failure-Prone
                 Multidatabase System Environment",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "1",
  pages =        "1--39",
  month =        jul,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Breitbart:Yuri.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thompson:Glenn_R=.html",
  abstract =     "This paper is concerned with the problem of
                 integrating a number of existing, off-the-shelf local
                 database systems into a multidatabase system that
                 maintains consistency in the face of concurrency and
                 failures. The major difficulties in designing such
                 systems stem from the requirements that local
                 transactions be allowed to execute outside the
                 multidatabase system control, and that the various
                 local database systems cannot participate in the
                 execution of a global commit protocol. A scheme based
                 on the assumption that the component local database
                 systems use the strict two-phase locking protocol is
                 developed. Two major problems are addressed: How to
                 ensure global transaction atomicity without the
                 provision of a commit protocol, and how to ensure
                 freedom from global deadlocks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "algorithms; deadlock recovery; performance;
                 reliability; serializibility; transaction log",
  xxauthor =     "Yuri Breitbart and Avi Silberschatz and Glenn R.
                 Thompson",
  xxpages =      "1--40",
}

@Article{Nodine:1992:CTH,
  author =       "Marian H. Nodine and Stanley B. Zdonik",
  title =        "Cooperative Transaction Hierarchies: Transaction
                 Support for Design Applications",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "1",
  pages =        "41--80",
  month =        jul,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Nodine:Marian_H=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zdonik:Stanley_B=.html",
  abstract =     "Traditional atomic and nested transactions are not
                 always well-suited to cooperative applications, such as
                 design applications. Cooperative applications place
                 requirements on the database that may conflict with the
                 serializability requirement. They require transactions
                 to be long, possibly nested, and able to interact with
                 each other in a structured way. We define a transaction
                 framework, called a {\em cooperative transaction
                 hierarchy}, that allows us to relax the requirement for
                 atomic, serializable transactions to better support
                 cooperative applications. In cooperative transaction
                 hierarchies, we allow the correctness specification for
                 groups of designers to be tailored to the needs of the
                 application. We use {\em patterns\/} and {\em
                 conflicts\/} to specify the constraints imposed on a
                 group's history for it to be correct. We also provide
                 some primitives to smooth the operation of the members.
                 We characterize deadlocks in a cooperative transaction
                 hierarchy, and provide mechanisms for deadlock
                 detection and resolution. We examine issues associated
                 with failure and recovery.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cooperation; deadlock detection; design transactions;
                 non-serializability; transaction hierarchies;
                 transaction synchronization; version management",
}

@Article{Spaccapietra:1992:MIA,
  author =       "Stefano Spaccapietra and Christine Parent and Yann
                 Dupont",
  title =        "Model Independent Assertions for Integration of
                 Heterogeneous Schemas",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "1",
  pages =        "81--126",
  month =        jul,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dupont:Yann.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Parent:Christine.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Spaccapietra:Stefano.html",
  abstract =     "Due to the proliferation of database applications, the
                 integration of existing databases into a distributed or
                 federated system is one of the major challenges in
                 responding to enterprises' information requirements.
                 Some proposed integration techniques aim at providing
                 database administrators (DBAs) with a view definition
                 language they can use to build the desired integrated
                 schema. These techniques leave to the DBA the
                 responsibility of appropriately restructuring schema
                 elements from existing local schemas and of solving
                 inter-schema conflicts. This paper investigates the
                 {\em assertion-based\/} approach, in which the DBA's
                 action is limited to pointing out corresponding
                 elements in the schemas and to defining the nature of
                 the correspondence in between. This methodology is
                 capable of: ensuring better integration by taking into
                 account additional semantic information (assertions
                 about links); automatically solving structural
                 conflicts; building the integrated schema without
                 requiring conforming of initial schemas; applying
                 integration rules to a variety of data models; and
                 performing view as well as database integration. This
                 paper presents the basic ideas underlying our approach
                 and focuses on resolution of structural conflicts.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "conceptual modeling; database design and integration;
                 distributed databases; federated databases;
                 heterogeneous databases; schema integration",
}

@Article{Hsiao:1992:FDSa,
  author =       "David K. Hsiao",
  title =        "Federated Databases and Systems: {Part I} --- a
                 Tutorial on Their Data Sharing",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "1",
  pages =        "127--179",
  month =        jul,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:David_K=.html",
  abstract =     "The issues and solutions for the interoperability of a
                 class of heterogeneous databases and their database
                 systems are expounded in two parts. Part I presents the
                 data-sharing issues in federated databases and systems.
                 Part II, which will appear in a future issue, explores
                 resource-consolidation issues. {\em Interoperability\/}
                 in this context refers to data sharing among
                 heterogeneous databases, and to resource consolidation
                 of computer hardware, system software, and support
                 personnel. {\em Resource consolidation\/} requires the
                 presence of a database system architecture which
                 supports the heterogeneous system software, thereby
                 eliminating the need for various computer hardware and
                 support personnel. The class of heterogeneous databases
                 and database systems expounded herein is termed {\em
                 federated}, meaning that they are joined in order to
                 meet certain organizational requirements and because
                 they require their respective application
                 specificities, integrity constraints, and security
                 requirements to be upheld. Federated databases and
                 systems are new. While there are no technological
                 solutions, there has been considerable research towards
                 their development. This tutorial is aimed at exposing
                 the need for such solutions. A taxonomy is introduced
                 in our review of existing research undertakings and
                 exploratory developments. With this taxonomy, we
                 contrast and compare various approaches to federating
                 databases and systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute-based;
                 data-model-and-language-to-data-model-and-language
                 mappings; database conversion; hierarchical; network;
                 object-oriented; relational; schema transformation;
                 transaction translation",
  xxpages =      "127--180",
}

@Article{Breitbart:1992:OMT,
  author =       "Yuri Breitbart and Hector Garcia-Molina and Abraham
                 Silberschatz",
  title =        "Overview of Multidatabase Transaction Management",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "2",
  pages =        "181--240",
  month =        oct,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Breitbart:Yuri.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html",
  abstract =     "A multidatabase system (MDBS) is a facility that
                 allows users access to data located in multiple
                 autonomous database management systems (DBMSs). In such
                 a system, {\em global transactions\/} are executed
                 under the control of the MDBS. Independently, {\em
                 local transactions\/} are executed under the control of
                 the local DBMSs. Each local DBMS integrated by the MDBS
                 may employ a different transaction management scheme.
                 In addition, each local DBMS has complete control over
                 all transactions (global and local) executing at its
                 site, including the ability to abort at any point any
                 of the transactions executing at its site. Typically,
                 no design or internal DBMS structure changes are
                 allowed in order to accommodate the MDBS. Furthermore,
                 the local DBMSs may not be aware of each other and, as
                 a consequence, cannot coordinate their actions. Thus,
                 traditional techniques for ensuring transaction
                 atomicity and consistency in homogeneous distributed
                 database systems may not be appropriate for an MDBS
                 environment. The objective of this article is to
                 provide a brief review of the most current work in the
                 area of multidatabase transaction management. We first
                 define the problem and argue that the multidatabase
                 research will become increasingly important in the
                 coming years. We then outline basic research issues in
                 multidatabase transaction management and review recent
                 results in the area. We conclude with a discussion of
                 open problems and practical implications of this
                 research.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multidatabase; recovery; reliability; serializability;
                 transaction; two-level serializability",
  xxauthor =     "Yuri Breitbart and Hector Garcia-Molina and Avi
                 Silberschatz",
}

@Article{Drew:1992:TII,
  author =       "Pamela Drew and Roger King and Dennis Heimbigner",
  title =        "A Toolkit for the Incremental Implementation of
                 Heterogeneous Database Management Systems",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "2",
  pages =        "241--284",
  month =        oct,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Drew:Pamela.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Heimbigner:Dennis.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/King:Roger.html",
  abstract =     "The integration of heterogeneous database environments
                 is a difficult and complex task. The A la carte
                 Framework addresses this complexity by providing a
                 reusable and extensible architecture in which a set of
                 heterogeneous database management systems can be
                 integrated. The goal is to support incremental
                 integration of existing database facilities into
                 heterogeneous, interoperative, distributed systems. The
                 Framework addresses the three main issues in
                 heterogeneous systems integration. First, it identifies
                 the problems in integrating heterogeneous systems.
                 Second, it identifies the key interfaces and parameters
                 required for autonomous systems to interoperate
                 correctly. Third, it demonstrates an approach to
                 integrating these interfaces in an extensible and
                 incremental way. The A la carte Framework provides a
                 set of reusable, integrating components which integrate
                 the major functional domains, such as transaction
                 management, that could or should be integrated in
                 heterogeneous systems. It also provides a mechanism for
                 capturing key characteristics of the components and
                 constraints which describe how the components can be
                 mixed and interchanged, thereby helping to reduce the
                 complexity of the integration process. Using this
                 framework, we have implemented an experimental,
                 heterogeneous configuration as part of the object
                 management work in the software engineering research
                 consortium, Arcadia.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database toolkits; extensible databases; heterogeneous
                 databases; heterogeneous transaction management;
                 incremental integration; open architectures;
                 reconfigurable architectures",
}

@Article{Hsiao:1992:FDSb,
  author =       "David K. Hsiao",
  title =        "Federated Databases and Systems: {Part II} --- a
                 Tutorial on Their Resource Consolidation",
  journal =      j-VLDB-J,
  volume =       "1",
  number =       "2",
  pages =        "285--310",
  month =        oct,
  year =         "1992",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:23 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb1.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:David_K=.html",
  abstract =     "The issues and solutions for the interoperability of a
                 class of heterogeneous databases and their database
                 systems are expounded in two parts. Part I presented
                 the data-sharing issues in federated databases and
                 systems (Hsiao, 1992). The present article explores
                 resource-consolidation issues. {\em Interoperability\/}
                 in this context refers to data sharing among
                 heterogeneous databases, and to resource consolidation
                 of computer hardware, system software, and support
                 personnel. {\em Resource consolidation\/} requires the
                 presence of a database system architecture which
                 supports the heterogeneous system software, thereby
                 eliminating the need for various computer hardware and
                 support personnel. The class of heterogeneous databases
                 and database systems expounded herein is termed {\em
                 federated}, meaning that they are joined in order to
                 meet certain organizational requirements and because
                 they require their respective application
                 specificities, integrity constraints, and security
                 requirements to be upheld. Federated databases and
                 systems are new. While there are no technological
                 solutions, there has been considerable research towards
                 their development. This tutorial is aimed at exposing
                 the need for such solutions. A taxonomy is introduced
                 in our review of existing research undertakings and
                 exploratory developments. With this taxonomy, we
                 contrast and compare various approaches to federating
                 databases and systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute-based;
                 data-model-and-language-to-data-model-and-language
                 mappings; database conversion; hierarchical; network;
                 object-oriented; relational; schema transformation;
                 transaction translation",
}

@Article{Yu:1993:BMB,
  author =       "Philip S. Yu and Douglas W. Cornell",
  title =        "Buffer Management Based on Return on Consumption in a
                 Multi-Query Environment",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "1",
  pages =        "1--37",
  month =        jan,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:24 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cornell:Douglas_W=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html",
  abstract =     "In a multi-query environment, the marginal utilities
                 of allocating additional buffer to the various queries
                 can be vastly different. The conventional approach
                 examines each query in isolation to determine the
                 optimal access plan and the corresponding locality set.
                 This can lead to performance that is far from optimal.
                 As each query can have different access plans with
                 dissimilar locality sets and sensitivities to memory
                 requirement, we employ the concepts of memory
                 consumption and return on consumption (ROC) as the
                 basis for memory allocations. Memory consumption of a
                 query is its space-time product, while ROC is a measure
                 of the effectiveness of response-time reduction through
                 additional memory consumption. A global optimization
                 strategy using simulated annealing is developed, which
                 minimizes the average response over all queries under
                 the constraint that the total memory consumption rate
                 has to be less than the buffer size. It selects the
                 optimal join method and memory allocation for all query
                 types simultaneously. By analyzing the way the optimal
                 strategy makes memory allocations, a heuristic
                 threshold strategy is then proposed. The threshold
                 strategy is based on the concept of ROC. As the memory
                 consumption rate by all queries is limited by the
                 buffer size, the strategy tries to allocate the memory
                 so as to make sure that a certain level of ROC is
                 achieved. A simulation model is developed to
                 demonstrate that the heuristic strategy yields
                 performance that is very close to the optimal strategy
                 and is far superior to the conventional allocation
                 strategy.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "buffer management; join methods; query optimization;
                 queueing model; simulated annealing; simulation",
  xxpages =      "1--38",
}

@Article{Harder:1993:CCI,
  author =       "Theo H{\"a}rder and Kurt Rothermel",
  title =        "Concurrency Control Issues in Nested Transactions",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "1",
  pages =        "39--74",
  month =        jan,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:24 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rothermel:Kurt.html",
  abstract =     "The concept of nested transactions offers more
                 decomposable execution units and finer-grained control
                 over concurrency and recovery than `flat' transactions.
                 Furthermore, it supports the decomposition of a `unit
                 of work' into subtasks and their appropriate
                 distribution in a computer system as a prerequisite of
                 intratransaction parallelism. However, to exploit its
                 full potential, suitable granules of concurrency
                 control as well as access modes for shared data are
                 necessary. In this article, we investigate various
                 issues of concurrency control for nested transactions.
                 First, the mechanisms for cooperation and communication
                 within nested transactions should not impede parallel
                 execution of transactions among parent and children or
                 among siblings. Therefore, a model for nested
                 transactions is proposed allowing for effective
                 exploitation of intra-transaction parallelism. Starting
                 with a set of basic locking rules, we introduce the
                 concept of `downward inheritance of locks' to make data
                 manipulated by a parent available to its children. To
                 support supervised and restricted access, this concept
                 is refined to `controlled downward inheritance.' The
                 initial concurrency control scheme was based on S-X
                 locks for `flat,' non-overlapping data objects. In
                 order to adjust this scheme for practical applications,
                 a set of concurrency control rules is derived for
                 generalized lock modes described by a compatibility
                 matrix. Also, these rules are combined with a
                 hierarchical locking scheme to improve selective access
                 to data granules of varying sizes. After having tied
                 together both types of hierarchies (transaction and
                 object), it can be shown how `controlled downward
                 inheritance' for hierarchical objects is achieved in
                 nested transactions. Finally, problems of deadlock
                 detection and resolution in nested transactions are
                 considered.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; locking; nested transactions;
                 object hierarchies",
}

@Article{Jensen:1993:UDT,
  author =       "Christian S. Jensen and Leo Mark and Nick Roussopoulos
                 and Timos K. Sellis",
  title =        "Using Differential Techniques to Efficiently Support
                 Transaction Time",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "1",
  pages =        "75--116",
  month =        jan,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:24 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jensen:Christian_S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mark:Leo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roussopoulos:Nick.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html",
  abstract =     "We present an architecture for query processing in the
                 relational model extended with transaction time. The
                 architecture integrates standard query optimization and
                 computation techniques with new differential
                 computation techniques. Differential computation
                 computes a query incrementally or decrementally from
                 the cached and indexed results of previous
                 computations. The use of differential computation
                 techniques is essential in order to provide efficient
                 processing of queries that access very large temporal
                 relations. Alternative query plans are integrated into
                 a state transition network, where the state space
                 includes backlogs of base relations, cached results
                 from previous computations, a cache index, and
                 intermediate results; the transitions include standard
                 relational algebra operators, operators for
                 constructing differential files, operators for
                 differential computation, and combined operators. A
                 rule set is presented to prune away parts of state
                 transition networks that are not promising, and dynamic
                 programming techniques are used to identify the optimal
                 plans from the remaining state transition networks. An
                 extended logical access path serves as a `structuring'
                 index on the cached results and contains, in addition,
                 vital statistics for the query optimization process
                 (including statistics about base relations, backlogs,
                 and queries---previously computed and cached,
                 previously computed, or just previously estimated).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "efficient query processing; incremental and
                 decremental computation; temporal databases;
                 transaction time",
}

@Article{Haritsa:1993:VBS,
  author =       "Jayant R. Haritsa and Michael J. Carey and Miron
                 Livny",
  title =        "Value-Based Scheduling in Real-Time Database Systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "117--152",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haritsa:Jayant_R=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Livny:Miron.html",
  abstract =     "In a real-time database system, an application may
                 assign a {\em value\/} to a transaction to reflect the
                 return it expects to receive if the transaction commits
                 before its deadline. Most research on real-time
                 database systems has focused on systems where all
                 transactions are assigned the same value, the
                 performance goal being to minimize the number of missed
                 deadlines. When transactions are assigned different
                 values, the goal of the system shifts to maximizing the
                 sum of the values of those transactions that commit by
                 their deadlines. Minimizing the number of missed
                 deadlines becomes a secondary concern. In this article,
                 we address the problem of establishing a priority
                 ordering among transactions characterized by both
                 values and deadlines that results in maximizing the
                 realized value. Of particular interest is the tradeoff
                 established between these values and deadlines in
                 constructing the priority ordering. Using a detailed
                 simulation model, we evaluate the performance of
                 several priority mappings that make this tradeoff in
                 different, but fixed, ways. In addition, a `bucket'
                 priority mechanism that allows the relative importance
                 of values and deadlines to be controlled is introduced
                 and studied. The notion of associating a penalty with
                 transactions whose deadlines are not met is also
                 briefly considered.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "priority and concurrency algorithms; priority mapping;
                 resource and data contention; transaction values and
                 deadlines",
}

@Article{Grant:1993:QLR,
  author =       "John Grant and Witold Litwin and Nick Roussopoulos and
                 Timos K. Sellis",
  title =        "Query Languages for Relational Multidatabases",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "153--171",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Grant:John.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roussopoulos:Nick.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html",
  abstract =     "With the existence of many autonomous databases widely
                 accessible through computer networks, users will
                 require the capability to jointly manipulate data in
                 different databases. A multidatabase system provides
                 such a capability through a multidatabase manipulation
                 language, such as MSQL. We propose a theoretical
                 foundation for such languages by presenting a
                 multirelational algebra and calculus based on the
                 relational algebra and calculus. The proposal is
                 illustrated by various queries on an example
                 multidatabase. It is shown that properties of the
                 multirelational algebra may be used for optimization
                 and that every multirelational algebra query can be
                 expressed as a multirelational calculus query. The
                 connection between the multirelational languages and
                 MSQL, the multidatabase version of SQL, is also
                 investigated.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multidatabase; multirelational algebra;
                 multirelational calculus; query optimization",
  xxpages =      "153--172",
}

@Article{Neufeld:1993:GCT,
  author =       "Andrea Neufeld and Guido Moerkotte and Peter C.
                 Lockemann",
  title =        "Generating Consistent Test Data for a Variable Set of
                 General Consistency Constraints",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "173--213",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lockemann:Peter_C=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moerkotte:Guido.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neufeld:Andrea.html",
  abstract =     "To address the problem of generating test data for a
                 set of general consistency constraints, we propose a
                 new two-step approach: First the interdependencies
                 between consistency constraints are explored and a
                 generator formula is derived on their basis. During its
                 creation, the user may exert control. In essence, the
                 generator formula contains information to restrict the
                 search for consistent test databases. In the second
                 step, the test database is generated. Here, two
                 different approaches are proposed. The first adapts an
                 already published approach to generating finite models
                 by enhancing it with requirements imposed by test data
                 generation. The second, a new approach, operationalizes
                 the generator formula by translating it into a sequence
                 of operators, and then executes it to construct the
                 test database. For this purpose, we introduce two
                 powerful operators: the generation operator and the
                 test-and-repair operator. This approach also allows for
                 enhancing the generation operators with heuristics for
                 generating facts in a goal-directed fashion. It avoids
                 the generation of test data that may contradict the
                 consistency constraints, and limits the search space
                 for the test data. This article concludes with a
                 careful evaluation and comparison of the performance of
                 the two approaches and their variants by describing a
                 number of benchmarks and their results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "consistency; design; logic; test data; validation",
  xxpages =      "173--214",
  xxtitle =      "Generating consistent test data: restricting the
                 search space by a generator formula",
}

@Article{Du:1993:SCU,
  author =       "Weimin Du and Ahmed K. Elmagarmid and Won Kim and
                 Omran A. Bukhres",
  title =        "Supporting Consistent Updates in Replicated
                 Multidatabase Systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "215--241",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bukhres:Omran_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Du:Weimin.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kim:Won.html",
  abstract =     "Replication is useful in multidatabase systems (MDBSs)
                 because, as in traditional distributed database
                 systems, it increases data availability in the presence
                 of failures and decreases data retrieval costs by
                 reading local or close copies of data. Concurrency
                 control, however, is more difficult in replicated MDBSs
                 than in ordinary distributed database systems. This is
                 the case not only because local concurrency controllers
                 may schedule global transactions inconsistently, but
                 also because local transactions (at different sites)
                 may access the same replicated data. In this article,
                 we propose a decentralized concurrency control protocol
                 for a replicated MDBS. The proposed strategy supports
                 prompt and consistent updates of replicated data by
                 both local and global applications without a central
                 coordinator.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; multidatabases; replica control;
                 replicated data management; resolvable conflicts;
                 serializability",
}

@Article{Anonymous:1993:Ca,
  author =       "Anonymous",
  title =        "Column",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "??--??",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Anonymous:1993:Cb,
  author =       "Anonymous",
  title =        "Column",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "2",
  pages =        "??--??",
  month =        apr,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tomasic:1993:SIP,
  author =       "Anthony Tomasic and Hector Garcia-Molina",
  title =        "Special Issue in Parallelism in Database Systems:
                 Query Processing and Inverted Indices in Shared-Nothing
                 Document Information Retrieval Systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "243--275",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tomasic:Anthony.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tomasic:1993:QPI,
  author =       "Anthony Tomasic and Hector Garcia-Molina",
  title =        "Query processing and inverted indices in shared:
                 nothing text document information retrieval systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "243--276",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The performance of distributed text document retrieval
                 systems is strongly influenced by the organization of
                 the inverted text. This article compares the
                 performance impact on query processing of various
                 physical organizations for inverted lists. We present a
                 new probabilistic model of the database and queries.
                 Simulation experiments determine those variables that
                 most strongly influence response time and throughput.
                 This leads to a set of design trade-offs over a wide
                 range of hardware configurations and new parallel query
                 processing strategies.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "file organization; full text information retrieval;
                 inverted file; inverted index; performance; query
                 processing; shared-nothing; striping",
}

@Article{Ziane:1993:PQP,
  author =       "Mikal Ziane and Mohamed Za{\"\i}t and Pascale
                 Borla-Salamet",
  title =        "Parallel Query Processing with Zigzag Trees",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "277--301",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:26 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Borla=Salamet:Pascale.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Za=iuml=t:Mohamed.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Ziane:Mikal.html",
  abstract =     "In this article, we describe our approach to the
                 compile-time optimization and parallelization of
                 queries for execution in DBS3 or EDS. DBS3 is a
                 shared-memory parallel database system, while the EDS
                 system has a distributed-memory architecture. Because
                 DBS3 implements a parallel dataflow execution model,
                 this approach applies to both architectures. Using
                 randomized search strategies enables the exploration of
                 a search space large enough to include zigzag trees,
                 which are intermediate between left-deep and right-deep
                 trees. Zigzag trees are shown to provide better
                 response time than right-deep trees in case of limited
                 memory. Performance measurements obtained using the
                 DBS3 prototype show the advantages of zigzag trees
                 under various conditions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cost function; fragmentation; pipeline; search space",
  xxpages =      "277--302",
}

@Article{Hua:1993:CDS,
  author =       "Kien A. Hua and Yu-lung Lo and Honesty C. Young",
  title =        "Considering Data Skew Factor in Multi-Way Join Query
                 Optimization for Parallel Execution",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "303--330",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:26 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hua:Kien_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lo:Yu=lung.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Young:Honesty_C=.html",
  abstract =     "A consensus on parallel architecture for very large
                 database management has emerged. This architecture is
                 based on a shared-nothing hardware organization. The
                 computation model is very sensitive to skew in tuple
                 distribution, however. Recently, several parallel join
                 algorithms with dynamic load balancing capabilities
                 have been proposed to address this issue, but none of
                 them consider multi-way join problems. In this article
                 we propose a dynamic load balancing technique for
                 multi-way joins, and investigate the effect of load
                 balancing on query optimization. In particular, we
                 present a join-ordering strategy that takes
                 load-balancing issues into consideration. Our
                 performance study indicates that the proposed query
                 optimization technique can provide very impressive
                 performance improvement over conventional approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "load balancing; multi-way join; parallel-database
                 computer; query optimization",
  xxauthor =     "Kien A. Hua and Yo Lung Lo and Honesty C. Young",
}

@Article{Zhang:1993:TGC,
  author =       "Aidong Zhang and Ahmed K. Elmagarmid",
  title =        "A Theory of Global Concurrency Control in
                 Multidatabase Systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "331--360",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:26 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhang:Aidong.html",
  abstract =     "This article presents a theoretical basis for global
                 concurrency control to maintain global serializability
                 in multidatabase systems. Three correctness criteria
                 are formulated that utilize the intrinsic
                 characteristics of global transactions to determine the
                 serialization order of global subtransactions at each
                 local site. In particular, two new types of
                 serializability, chain-conflicting serializability and
                 sharing serializability, are proposed and hybrid
                 serializability, which combines these two basic
                 criteria, is discussed. These criteria offer the
                 advantage of imposing no restrictions on local sites
                 other than local serializability while retaining global
                 serializability. The graph testing techniques of the
                 three criteria are provided as guidance for global
                 transaction scheduling. In addition, an optimal
                 property of global transactions for determinating the
                 serialization order of global subtransactions at local
                 sites is formulated. This property defines the upper
                 limit on global serializability in multidatabase
                 systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "chain-conflicting serializability; hybrid
                 serializability; optimality; sharing serializability",
}

@Article{Anonymous:1993:SIP,
  author =       "Anonymous",
  title =        "Special issue in parallelism in database systems",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "3",
  pages =        "??--??",
  month =        jul,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Srinivasan:1993:PBT,
  author =       "V. Srinivasan and Michael J. Carey",
  title =        "Performance of {B$^+$} tree concurrency control
                 algorithms",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "4",
  pages =        "361--406",
  month =        oct,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:27 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Srinivasan:V=.html",
  abstract =     "A number of algorithms have been proposed to access
                 B$^+$-trees concurrently, but they are not well
                 understood. In this article, we study the performance
                 of various B$^+$-tree concurrency control algorithms
                 using a detailed simulation model of B$^+$-tree
                 operations in a centralized DBMS. Our study covers a
                 wide range of data contention situations and resource
                 conditions. In addition, based on the performance of
                 the set of B$^+$-tree concurrency control algorithms,
                 which includes one new algorithm, we make projections
                 regarding the performance of other algorithms in the
                 literature. Our results indicate that algorithms with
                 updaters that lock-couple using exclusive locks perform
                 poorly as compared to those that permit more optimistic
                 index descents. In particular, the B-link algorithms
                 are seen to provide the most concurrency and the best
                 overall performance. Finally, we demonstrate the need
                 for a highly concurrent long-term lock holding strategy
                 to obtain the full benefits of a highly concurrent
                 algorithm for index operations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "B+-tree structures; data contention; lock modes;
                 performance; resource conditions; simulation models;
                 workload parameters",
  xxtitle =      "Performance of {B+} Tree Concurrency Algorithms",
}

@Article{Weikum:1993:MLT,
  author =       "Gerhard Weikum and Christof Hasse",
  title =        "Multi-Level Transaction Management for Complex
                 Objects: Implementation, Performance, Parallelism",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "4",
  pages =        "407--453",
  month =        oct,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:27 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hasse:Christof.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html",
  abstract =     "Multi-level transactions are a variant of open-nested
                 transactions in which the subtransactions correspond to
                 operations at different levels of a layered system
                 architecture. They allow the exploitation of semantics
                 of high-level operations to increase concurrency. As a
                 consequence, undoing a transaction requires
                 compensation of completed subtransactions. In addition,
                 multi-level recovery methods must take into
                 consideration that high-level operations are not
                 necessarily atomic if multiple pages are updated in a
                 single subtransaction. This article presents algorithms
                 for multi-level transaction management that are
                 implemented in the database kernel system (DASDBS). In
                 particular, we show that multi-level recovery can be
                 implemented in an efficient way. We discuss performance
                 measurements using a synthetic benchmark for processing
                 complex objects in a multi-user environment. We show
                 that multi-level transaction management can be extended
                 easily to cope with parallel subtransactions within a
                 single transaction. Performance results are presented
                 with varying degrees of inter- and intratransaction
                 parallelism.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "atomicity; complex objects; inter- and
                 intratransaction parallelism; multi-level transactions;
                 performance; persistence; recovery",
  xxpages =      "407--454",
}

@Article{Storey:1993:USR,
  author =       "Veda C. Storey",
  title =        "Understanding Semantic Relationships",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "4",
  pages =        "455--488",
  month =        oct,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:27 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Storey:Veda_C=.html",
  abstract =     "To develop sophisticated database management systems,
                 there is a need to incorporate more understanding of
                 the real world in the information that is stored in a
                 database. Semantic data models have been developed to
                 try to capture some of the meaning, as well as the
                 structure, of data using abstractions such as
                 inclusion, aggregation, and association. Besides these
                 well-known relationships, a number of additional
                 semantic relationships have been identified by
                 researchers in other disciplines such as linguistics,
                 logic, and cognitive psychology. This article explores
                 some of the lesser-recognized semantic relationships
                 and discusses both how they could be captured, either
                 manually or by using an automated tool, and their
                 impact on database design. To demonstrate the
                 feasibility of this research, a prototype system for
                 analyzing semantic relationships, called the Semantic
                 Relationship Analyzer, is presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database design; database design systems;
                 entity-relationship model; relational model; semantic
                 relationships",
}

@Article{Tseng:1993:SMS,
  author =       "Frank Shou-Cheng Tseng and Arbee L. P. Chen and W.-P.
                 Yang",
  title =        "Searching a Minimal Semantically-Equivalent Subset of
                 a Set of Partial Values",
  journal =      j-VLDB-J,
  volume =       "2",
  number =       "4",
  pages =        "489--512",
  month =        oct,
  year =         "1993",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:27 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb2.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Arbee_L=_P=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tseng:Frank_Shou=Cheng.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yang:W==P=.html",
  abstract =     "Imprecise data exist in databases due to their
                 unavailability or to data/schema incompatibilities in a
                 multidatabase system. Partial values have been used to
                 represent imprecise data. Manipulation of partial
                 values is therefore necessary to process queries
                 involving imprecise data. In this article, we study the
                 problem of eliminating redundant partial values that
                 result from a projection on an attribute with partial
                 values. The redundancy of partial values is defined
                 through the interpretation of a set of partial values.
                 This problem is equivalent to searching a minimal
                 semantically-equivalent subset of a set of partial
                 values. A semantically-equivalent subset contains
                 exactly the same information as the original set. We
                 derive a set of useful properties and apply a graph
                 matching technique to develop an efficient algorithm
                 for searching such a minimal subset and therefore
                 eliminating redundant partial values. By this process,
                 we not only provide a concise answer to the user, but
                 also reduce the communication cost when partial values
                 are requested to be transmitted from one site to
                 another site in a distributed environment. Moreover,
                 further manipulation of the partial values can be
                 simplified. This work is also extended to the case of
                 multi-attribute projections.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "bipartite graph; graph matching; imprecise data;
                 minimal elements; multidatabase systems; partial
                 values",
  xxauthor =     "Frank S. C. Tseng and Arbee L. P. Chen and Wei Pang
                 Yang",
}

@Article{Georgakopoulos:1994:CST,
  author =       "Dimitrios Georgakopoulos and Marek Rusinkiewicz and
                 Witold Litwin",
  title =        "Chronological Scheduling of Transactions with Temporal
                 Dependencies",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "1",
  pages =        "1--28",
  month =        jan,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:28 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Georgakopoulos:Dimitrios.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rusinkiewicz:Marek.html",
  abstract =     "Database applications often impose temporal
                 dependencies between transactions that must be
                 satisfied to preserve data consistency. The extant
                 correctness criteria used to schedule the execution of
                 concurrent transactions are either time independent or
                 use strict, difficult to satisfy real-time constraints.
                 On one end of the spectrum, serializability completely
                 ignores time. On the other end, deadline scheduling
                 approaches consider the outcome of each transaction
                 execution correct only if the transaction meets its
                 real-time deadline. In this article, we explore new
                 correctness criteria and scheduling methods that
                 capture temporal transaction dependencies and belong to
                 the broad area between these two extreme approaches. We
                 introduce the concepts of {\em succession dependency\/}
                 and {\em chronological dependency\/} and define
                 correctness criteria under which temporal dependencies
                 between transactions are preserved even if the
                 dependent transactions execute concurrently. We also
                 propose a {\em chronological scheduler\/} that can
                 guarantee that transaction executions satisfy their
                 chronological constraints. The advantages of
                 chronological scheduling over traditional scheduling
                 methods, as well as the main issues in the
                 implementation and performance of the proposed
                 scheduler, are discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrent succession; execution correctness; partial
                 rollbacks; synchronization; transaction ordering",
}

@Article{Whang:1994:DMD,
  author =       "Kyu Young Whang and Sang Wook Kim and Gio
                 Wiederhold",
  title =        "Dynamic Maintenance of Data Distribution for
                 Selectivity Estimation",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "1",
  pages =        "29--51",
  month =        jan,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:28 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kim:Sang=Wook.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Whang:Kyu=Young.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wiederhold:Gio.html",
  abstract =     "We propose a new dynamic method for multidimensional
                 selectivity estimation for range queries that works
                 accurately independent of data distribution. Good
                 estimation of selectivity is important for query
                 optimization and physical database design. Our method
                 employs the multilevel grid file (MLGF) for accurate
                 estimation of multidimensional data distribution. The
                 MLGF is a dynamic, hierarchical, balanced,
                 multidimensional file structure that gracefully adapts
                 to nonuniform and correlated distributions. We show
                 that the MLGF directory naturally represents a
                 multidimensional data distribution. We then extend it
                 for further refinement and present the selectivity
                 estimation method based on the MLGF. Extensive
                 experiments have been performed to test the accuracy of
                 selectivity estimation. The results show that
                 estimation errors are very small independent of
                 distributions, even with correlated and/or highly
                 skewed ones. Finally, we analyze the cause of errors in
                 estimation and investigate the effects of various
                 parameters on the accuracy of estimation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multidimensional file structure; multilevel grid
                 files; physical database design; query optimization",
}

@Article{Kamel:1994:PBO,
  author =       "Nabil Kamel and Ping Wu and Stanley Y. W. Su",
  title =        "A Pattern-Based Object Calculus",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "1",
  pages =        "53--76",
  month =        jan,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:28 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kamel:Nabil.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Su:Stanley_Y=_W=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wu:Ping.html",
  abstract =     "Several object-oriented database management systems
                 have been implemented without an accompanying
                 theoretical foundation for constraint, query
                 specification, and processing. The pattern-based object
                 calculus presented in this article provides such a
                 theoretical foundation for describing and processing
                 object-oriented databases. We view an object-oriented
                 database as a network of interrelated classes (i.e.,
                 the intension) and a collection of time-varying object
                 association patterns (i.e., the extension). The object
                 calculus is based on first-order logic. It provides the
                 formalism for interpreting precisely and uniformly the
                 semantics of queries and integrity constraints in
                 object-oriented databases. The power of the object
                 calculus is shown in four aspects. First, associations
                 among objects are expressed explicitly in an
                 object-oriented database. Second, the `nonassociation'
                 operator is included in the object calculus. Third,
                 set-oriented operations can be performed on both
                 homogeneous and heterogeneous object association
                 patterns. Fourth, our approach does not assume a
                 specific form of database schema. A proposed formalism
                 is also applied to the design of high-level
                 object-oriented query and constraint languages.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "association patterns; Object-oriented databases; query
                 expressions; semantic constraints",
}

@Article{Sciore:1994:VCM,
  author =       "Edward Sciore",
  title =        "Versioning and Configuration Management in an
                 Object-Oriented Data Model",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "1",
  pages =        "77--106",
  month =        jan,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:28 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sciore:Edward.html",
  abstract =     "Many database applications require the storage and
                 manipulation of different versions of data objects. To
                 satisfy the diverse needs of these applications,
                 current database systems support versioning at a very
                 low level. This article demonstrates that
                 application-independent versioning can be supported at
                 a significantly higher level. In particular, we extend
                 the EXTRA data model and EXCESS query language so that
                 configurations can be specified conceptually and
                 non-procedurally. We also show how version sets can be
                 viewed multidimensionally, thereby allowing
                 configurations to be expressed at a higher level of
                 abstraction. The resulting model integrates and
                 generalizes ideas in CAD systems, CASE systems, and
                 temporal databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "EXTRA/EXCESS data models; generic and specific
                 references; query language; semantically based
                 configuration specifications",
}

@Article{Ramamohanarao:1994:IDD,
  author =       "Kotagiri Ramamohanarao and James Harland",
  title =        "An introduction to deductive database languages and
                 systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "107--122",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ramamohanarao:1994:SIP,
  author =       "Kotagiri Ramamohanarao and James Harland",
  title =        "Special Issue on Prototypes of Deductive Database
                 Systems: An Introduction to Deductive Database
                 Languages and Systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "107--122",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harland:James.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Derr:1994:GND,
  author =       "Marcia A. Derr and Shinichi Morishita and Geoffrey
                 Phipps",
  title =        "The Glue-Nail Deductive Database System: Design,
                 Implementation, and Evaluation",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "123--160",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Derr:Marcia_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Morishita:Shinichi.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Phipps:Geoffrey.html",
  abstract =     "We describe the design and implementation of the
                 Glue-Nail deductive database system. Nail is a purely
                 declarative query language; Glue is a procedural
                 language used for non-query activities. The two
                 languages combined are sufficient to write a complete
                 application. Nail and Glue code are both compiled into
                 the target language IGlue. The Nail compiler uses
                 variants of the magic sets algorithm and supports
                 well-founded models. The Glue compiler's static
                 optimizer uses peephole techniques and data flow
                 analysis to improve code. The IGlue interpreter
                 features a run-time adaptive optimizer that reoptimizes
                 queries and automatically selects indexes. We also
                 describe the Glue-Nail benchmark suite, a set of
                 applications developed to evaluate the Glue-Nail
                 language and to measure the performance of the
                 system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "language; performance; query optimization",
}

@Article{Ramakrishnan:1994:CDS,
  author =       "Raghu Ramakrishnan and Divesh Srivastava and S.
                 Sudarshan and Praveen Seshadri",
  title =        "The {CORAL} Deductive System",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "161--210",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramakrishnan:Raghu.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:Praveen.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Srivastava:Divesh.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sudarshan:S=.html",
  abstract =     "CORAL is a deductive system that supports a rich
                 declarative language, and an interface to C++, which
                 allows for a combination of declarative and imperative
                 programming. A CORAL declarative program can be
                 organized as a collection of interacting modules. CORAL
                 supports a wide range of evaluation strategies, and
                 automatically chooses an efficient strategy for each
                 module in the program. Users can guide query
                 optimization by selecting from a wide range of control
                 choices. The CORAL system provides imperative
                 constructs to update, insert, and delete facts. Users
                 can program in a combination of declarative CORAL and
                 C++ extended with CORAL primitives. A high degree of
                 extensibility is provided by allowing C++ programmers
                 to use the class structure of C++ to enhance the CORAL
                 implementation. CORAL provides support for main-memory
                 data and, using the EXODUS storage manager,
                 disk-resident data. We present a comprehensive view of
                 the system from broad design goals, the language, and
                 the architecture, to language interfaces and
                 implementation details.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "deductive database; logic programming system; query
                 language",
}

@Article{Kiessling:1994:DSE,
  author =       "Werner Kie{\ss}ling and Helmut Schmidt and Werner
                 Strau{\ss} and Gerhard D{\"u}nzinger",
  title =        "{DECLARE} and {SDS}: Early Efforts to Commercialize
                 Deductive Database Technology",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "211--243",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/D=uuml=nzinger:Gerhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kie=szlig=ling:Werner.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schmidt:Helmut.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Strau=szlig=:Werner.html",
  abstract =     "The Smart Data System (SDS) and its declarative query
                 language, Declarative Reasoning, represent the first
                 large-scale effort to commercialize deductive database
                 technology. SDS offers the functionality of deductive
                 reasoning in a distributed, heterogeneous database
                 environment. In this article we discuss several
                 interesting aspects of the query compilation and
                 optimization process. The emphasis is on the query
                 execution plan data structure and its transformations
                 by the optimizing rule compiler. Through detailed case
                 studies we demonstrate that efficient and very compact
                 runtime code can be generated. We also discuss our
                 experiences gained from a large pilot application (the
                 MVV-expert) and report on several issues of practical
                 interest in engineering such a complex system,
                 including the migration from Lisp to C. We argue that
                 heuristic knowledge and control should be made an
                 integral part of deductive databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "declarative reasoning; distributed query processing;
                 heuristic control; multi-databases; productization;
                 query optimizer",
}

@Article{Vaghani:1994:ADD,
  author =       "Jayen Vaghani and Kotagiri Ramamohanarao and David B.
                 Kemp and Zoltan Somogyi and Peter J. Stuckey and Tim
                 S. Leask and James Harland",
  title =        "The {Aditi} Deductive Database System",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "245--288",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harland:James.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemp:David_B=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Leask:Tim_S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Somogyi:Zoltan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stuckey:Peter_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vaghani:Jayen.html",
  abstract =     "Deductive databases generalize relational databases by
                 providing support for recursive views and non-atomic
                 data. Aditi is a deductive system based on the
                 client-server model; it is inherently multi-user and
                 capable of exploiting parallelism on shared-memory
                 multiprocessors. The back-end uses relational
                 technology for efficiency in the management of
                 disk-based data and uses optimization algorithms
                 especially developed for the bottom-up evaluation of
                 logical queries involving recursion. The front-end
                 interacts with the user in a logical language that has
                 more expressive power than relational query languages.
                 We present the structure of Aditi, discuss its
                 components in some detail, and present performance
                 figures.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "implementation; logic; multi-user; parallelism;
                 relational database",
}

@Article{Anonymous:1994:SIP,
  author =       "Anonymous",
  title =        "Special issue on prototypes of deductive database
                 systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "2",
  pages =        "??--??",
  month =        apr,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:1994:EIV,
  author =       "Byung Suk Lee and Gio Wiederhold",
  title =        "Efficiently Instantiating View-Objects From Remote
                 Relational Databases",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "3",
  pages =        "289--323",
  month =        jul,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:30 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lee:Byung_Suk.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Wiederhold:Gio.html",
  abstract =     "View-objects are complex objects that are instantiated
                 by delivering a query to a database and converting the
                 query result into a nested structure. In relational
                 databases, query results are conventionally retrieved
                 as a single flat relation, which contains duplicate
                 subtuples in its composite tuples. These duplicate
                 subtuples increase the amount of data to be handled and
                 thus degrade performance. In this article, we describe
                 two new methods that retrieve a query result in
                 structures other than a single flat relation. One
                 method retrieves a set of relation fragments, and the
                 other retrieves a single-nested relation. We first
                 describe their algorithms and cost models, and then
                 present the cost comparison results in a client-server
                 architecture with a relational main memory database
                 residing on a server.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "client server; complex object; nested relation; query
                 optimization; relation fragments",
}

@Article{Barbara-Milla:1994:DPT,
  author =       "Daniel Barbar{\'a}-Mill{\'a} and Hector
                 Garcia-Molina",
  title =        "The demarcation protocol: a technique for maintaining
                 constraints in distributed database systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "3",
  pages =        "325--353",
  month =        jul,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional protocols for distributed database
                 management have a high message overhead; restrain or
                 lock access to resources during protocol execution; and
                 may become impractical for some scenarios like
                 real-time systems and very large distributed databases.
                 In this article, we present the demarcation protocol;
                 it overcomes these problems by using explicit
                 consistency constraints as the correctness criteria.
                 The method establishes safe limits as `lines drawn in
                 the sand' for updates, and makes it possible to change
                 these limits dynamically, enforcing the constraints at
                 all times. We show how this technique can be applied to
                 linear arithmetic, existential, key, and approximate
                 copy constraints.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "consistency constraints; serializability; transaction
                 limits",
}

@Article{Barbara:1994:DPT,
  author =       "Daniel Barbar{\'a} and Hector Garcia-Molina",
  title =        "The Demarcation Protocol: a Technique for
                 Maintaining Constraints in Distributed Database
                 Systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "3",
  pages =        "325--353",
  month =        jul,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barbar=aacute=:Daniel.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bertino:1994:ICO,
  author =       "Elisa Bertino",
  title =        "Index Configuration in Object-Oriented Databases",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "3",
  pages =        "355--399",
  month =        jul,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:30 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bertino:Elisa.html",
  abstract =     "In relational databases, an attribute of a relation
                 can have only a single primitive value, making it
                 cumbersome to model complex objects. The
                 object-oriented paradigm removes this difficulty by
                 introducing the notion of nested objects, which allows
                 the value of an object attribute to be another object
                 or a set of other objects. This means that a class
                 consists of a set of attributes, and the values of the
                 attributes are objects that belong to other classes;
                 that is, the definition of a class forms a hierarchy of
                 classes. All attributes of the nested classes are
                 nested attributes of the root of the hierarchy. A
                 branch of such hierarchy is called a {\em path}. In
                 this article, we address the problem of index
                 configuration for a given path. We first summarize some
                 basic concepts, and introduce the concept of index
                 configuration for a path. Then we present cost formulas
                 to evaluate the costs of the various configurations.
                 Finally, we present the algorithm that determines the
                 optimal configuration, and show its correctness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "index selection; physical database design; query
                 optimization",
}

@Article{Guting:1994:ISD,
  author =       "Ralf Hartmut G{\"u}ting",
  title =        "An introduction to spatial database systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "357--399",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose a definition of a spatial database system
                 as a database system that offers spatial data types in
                 its data model and query language, and supports spatial
                 data types in its implementation, providing at least
                 spatial indexing and spatial join methods. Spatial
                 database systems offer the underlying database
                 technology for geographic information systems and other
                 applications. We survey data modeling, querying, data
                 structures and algorithms, and system architecture for
                 such systems. The emphasis is on describing known
                 technology in a coherent manner, rather than listing
                 open problems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guting:1994:SIS,
  author =       "Ralf Hartmut G{\"u}ting",
  title =        "Special Issue on Spatial Database Systems: An
                 Introduction to Spatial Database Systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "357--399",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/G=uuml=ting:Ralf_Hartmut.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Baumann:1994:MMD,
  author =       "Peter Baumann",
  title =        "Management of Multidimensional Discrete Data",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "401--444",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Baumann:Peter.html",
  abstract =     "Spatial database management involves two main
                 categories of data: vector and raster data. The former
                 has received a lot of in-depth investigation; the
                 latter still lacks a sound framework. Current DBMSs
                 either regard raster data as pure byte sequences where
                 the DBMS has no knowledge about the underlying
                 semantics, or they do not complement array structures
                 with storage mechanisms suitable for huge arrays, or
                 they are designed as specialized systems with
                 sophisticated imaging functionality, but no general
                 database capabilities (e.g., a query language). Many
                 types of array data will require database support in
                 the future, notably 2-D images, audio data and general
                 signal-time series (1-D), animations (3-D), static or
                 time-variant voxel fields (3-D and 4-D), and the
                 ISO/IEC PIKS (Programmer's Imaging Kernel System)
                 BasicImage type (5-D). In this article, we propose a
                 comprehensive support of {\em multidimensional discrete
                 data\/} (MDD) in databases, including operations on
                 arrays of arbitrary size over arbitrary data types. A
                 set of requirements is developed, a small set of
                 language constructs is proposed (based on a formal
                 algebraic semantics), and a novel MDD architecture is
                 outlined to provide the basis for efficient MDD query
                 evaluation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "image database systems; multimedia database systems;
                 spatial index; tiling",
}

@Article{Chu:1994:SMA,
  author =       "Wesley W. Chu and Ion Tim Ieong and Ricky K. Taira",
  title =        "A Semantic Modeling Approach for Image Retrieval by
                 Content",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "445--477",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chu:Wesley_W=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ieong:Ion_Tim.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Taira:Ricky_K=.html",
  abstract =     "We introduce a semantic data model to capture the
                 hierarchical, spatial, temporal, and evolutionary
                 semantics of images in pictorial databases. This model
                 mimics the user's conceptual view of the image content,
                 providing the framework and guidelines for
                 preprocessing to extract image features. Based on the
                 model constructs, a spatial evolutionary query language
                 (SEQL), which provides direct image object manipulation
                 capabilities, is presented. With semantic information
                 captured in the model, spatial evolutionary queries are
                 answered efficiently. Using an object-oriented
                 platform, a prototype medical-image management system
                 was implemented at UCLA to demonstrate the feasibility
                 of the proposed approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "image; medical; multimedia databases; spatial query
                 processing; temporal evolutionary query processing",
}

@Article{Papadias:1994:QRS,
  author =       "Dimitris Papadias and Timos K. Sellis",
  title =        "Qualitative Representation of Spatial Knowledge in
                 Two-Dimensional Space",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "479--516",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papadias:Dimitris.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html",
  abstract =     "Various relation-based systems, concerned with the
                 qualitative representation and processing of spatial
                 knowledge, have been developed in numerous application
                 domains. In this article, we identify the common
                 concepts underlying qualitative spatial knowledge
                 representation, we compare the representational
                 properties of the different systems, and we outline the
                 computational tasks involved in relation-based spatial
                 information processing. We also describe {\em symbolic
                 spatial indexes}, relation-based structures that
                 combine several ideas in spatial knowledge
                 representation. A symbolic spatial index is an array
                 that preserves only a set of spatial relations among
                 distinct objects in an image, called the modeling
                 space; the index array discards information, such as
                 shape and size of objects, and irrelevant spatial
                 relations. The construction of a symbolic spatial index
                 from an input image can be thought of as a
                 transformation that keeps only a set of representative
                 points needed to define the relations of the modeling
                 space. By keeping the relative arrangements of the
                 representative points in symbolic spatial indexes and
                 discarding all other points, we maintain enough
                 information to answer queries regarding the spatial
                 relations of the modeling space without the need to
                 access the initial image or an object database.
                 Symbolic spatial indexes can be used to solve problems
                 involving route planning, composition of spatial
                 relations, and update operations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "qualitative spatial information processing;
                 representation of direction and topological relations;
                 spatial data models; spatial query languages",
}

@Article{Lin:1994:TTI,
  author =       "King Ip Lin and H. V. Jagadish and Christos
                 Faloutsos",
  title =        "The {TV}-Tree: An Index Structure for High-Dimensional
                 Data",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "517--542",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb3.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Faloutsos:Christos.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jagadish:H=_V=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lin:King=Ip.html",
  abstract =     "We propose a file structure to index
                 high-dimensionality data, which are typically points in
                 some feature space. The idea is to use only a few of
                 the features, using additional features only when the
                 additional discriminatory power is absolutely
                 necessary. We present in detail the design of our tree
                 structure and the associated algorithms that handle
                 such `varying length' feature vectors. Finally, we
                 report simulation results, comparing the proposed
                 structure with the $R*$-tree, which is one of the most
                 successful methods for low-dimensionality spaces. The
                 results illustrate the superiority of our method, which
                 saves up to 80\% in disk accesses.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "query by content; similarity retrieval; spatial
                 index",
}

@Article{Anonymous:1994:SIS,
  author =       "Anonymous",
  title =        "Special issue on spatial database systems",
  journal =      j-VLDB-J,
  volume =       "3",
  number =       "4",
  pages =        "??--??",
  month =        oct,
  year =         "1994",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:31 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Constantopoulos:1995:SIB,
  author =       "Panos Constantopoulos and Matthias Jarke and John
                 Mylopoulos and Yannis Vassiliou",
  title =        "The Software Information Base: a Server for Reuse",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "1",
  pages =        "1--43",
  month =        jan,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:32 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Constantopoulos:Panos.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jarke:Matthias.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mylopoulos:John.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vassiliou:Yannis.html",
  abstract =     "We present an experimental software repository system
                 that provides organization, storage, management, and
                 access facilities for reusable software components. The
                 system, intended as part of an applications development
                 environment, supports the representation of information
                 about requirements, designs and implementations of
                 software, and offers facilities for visual presentation
                 of the software objects. This article details the
                 features and architecture of the repository system, the
                 technical challenges and the choices made for the
                 system development along with a usage scenario that
                 illustrates its functionality. The system has been
                 developed and evaluated within the context of the
                 ITHACA project, a technology integration/software
                 engineering project sponsored by the European
                 Communities through the ESPRIT program, aimed at
                 developing an integrated reuse-centered application
                 development and support environment based on
                 object-oriented techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "conceptual modeling; information storage and
                 retrieval; object-oriented databases; reuse; software
                 engineering",
}

@Article{Clifton:1995:HDQ,
  author =       "Chris Clifton and Hector Garcia-Molina and David
                 Bloom",
  title =        "{HyperFile}: a Data and Query Model for Documents",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "1",
  pages =        "45--86",
  month =        jan,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:32 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bloom:David.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Clifton:Chris.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garcia=Molina:Hector.html",
  abstract =     "Non-quantitative information such as documents and
                 pictures pose interesting new problems in the database
                 world. Traditional data models and query languages do
                 not provide appropriate support for this information.
                 Such data are typically stored in file systems, which
                 do not provide the security, integrity, or query
                 features of database management systems. The hypertext
                 model has emerged as a good interface to this
                 information; however, {\em finding\/} information using
                 hypertext browsing does not scale well. We developed a
                 query interface that serves as an extension of the
                 browsing model of hypertext systems. These queries
                 minimize the repeated user interactions required to
                 locate data in a standard hypertext system. HyperFile
                 is a prototype data server interface. In this article,
                 we describe HyperFile, including a number of issues
                 such as query generation, query processing, and
                 indexing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "hypertext; indexing; user interface",
}

@Article{Agrawal:1995:OSL,
  author =       "Divyakant Agrawal and Amr {El Abbadi} and Richard
                 Jeffers and Lijing Lin",
  title =        "Ordered Shared Locks for Real-Time Databases",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "1",
  pages =        "87--126",
  month =        jan,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:32 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abbadi:Amr_El.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Agrawal:Divyakant.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jeffers:Richard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lin:Lijing.html",
  abstract =     "We propose locking protocols for real-time databases.
                 Our approach has two main motivations: First, locking
                 protocols are widely accepted and used in most database
                 systems. Second, in real-time databases it has been
                 shown that the blocking behavior of transactions in
                 locking protocols results in performance degradation.
                 We use a new relationship between locks called ordered
                 sharing to eliminate blocking that arises in the
                 traditional locking protocols. Ordered sharing
                 eliminates blocking of read and write operations but
                 may result in delayed termination. Since timeliness and
                 not response time is the crucial factor in real-time
                 databases, our protocols exploit this delay to allow
                 transactions to execute within the slacks of delayed
                 transactions. We compare the performance of the
                 proposed protocols with the two-phase locking protocol
                 for real-time databases. Our experiments indicate that
                 the proposed protocols significantly reduce the
                 percentage of missed deadlines in the system for a
                 variety of workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; time-critical scheduling;
                 transaction management",
}

@Article{Dan:1995:CDA,
  author =       "Asit Dan and Philip S. Yu and Jen Yao Chung",
  title =        "Characterization of Database Access Pattern for
                 Analytic Prediction of Buffer Hit Probability",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "1",
  pages =        "127--154",
  month =        jan,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:32 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chung:Jen=Yao.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dan:Asit.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html",
  abstract =     "The analytic prediction of buffer hit probability,
                 based on the characterization of database accesses from
                 real reference traces, is extremely useful for workload
                 management and system capacity planning. The knowledge
                 can be helpful for proper allocation of buffer space to
                 various database relations, as well as for the
                 management of buffer space for a mixed transaction and
                 query environment. Access characterization can also be
                 used to predict the buffer invalidation effect in a
                 multi-node environment which, in turn, can influence
                 transaction routing strategies. However, it is a
                 challenge to characterize the database access pattern
                 of a real workload reference trace in a simple manner
                 that can easily be used to compute buffer hit
                 probability. In this article, we use a characterization
                 method that distinguishes three types of access
                 patterns from a trace: (1) locality within a
                 transaction, (2) random accesses by transactions, and
                 (3) sequential accesses by long queries. We then
                 propose a concise way to characterize the access skew
                 across randomly accessed pages by logically grouping
                 the large number of data pages into a small number of
                 partitions such that the frequency of accessing each
                 page within a partition can be treated as equal. Based
                 on this approach, we present a recursive binary
                 partitioning algorithm that can infer the access skew
                 characterization from the buffer hit probabilities for
                 a subset of the buffer sizes. We validate the buffer
                 hit predictions for single and multiple node systems
                 using production database traces. We further show that
                 the proposed approach can predict the buffer hit
                 probability of a composite workload from those of its
                 component files.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access skew; analytic prediction; database access
                 characterization; reference trace; sequential access;
                 workload management",
}

@Article{Peckham:1995:DME,
  author =       "Joan Peckham and Bonnie MacKellar and Michael
                 Doherty",
  title =        "Data Model for Extensible Support of Explicit
                 Relationships in Design Databases",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "2",
  pages =        "157--191",
  month =        apr,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:33 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Doherty:Michael.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/MacKellar:Bonnie.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Peckham:Joan.html",
  abstract =     "We describe the conceptual model of SORAC, a data
                 modeling system developed at the University of Rhode
                 Island. SORAC supports both semantic objects and
                 relationships, and provides a tool for modeling
                 databases needed for complex design domains. SORAC's
                 set of built-in semantic relationships permits the
                 schema designer to specify enforcement rules that
                 maintain constraints on the object and relationship
                 types. SORAC then automatically generates C++ code to
                 maintain the specified enforcement rules, producing a
                 schema that is compatible with Ontos. This facilitates
                 the task of the schema designer, who no longer has to
                 ensure that all methods on object classes correctly
                 maintain necessary constraints. In addition, explicit
                 specification of enforcement rules permits automated
                 analysis of enforcement propagations. We compare the
                 interpretations of relationships within the semantic
                 and object-oriented models as an introduction to the
                 mixed model that SORAC supports. Next, the set of
                 built-in SORAC relationship types is presented in terms
                 of the enforcement rules permitted on each relationship
                 type. We then use the modeling requirements of an
                 architectural design support system, called
                 ArchObjects, to demonstrate the capabilities of SORAC.
                 The implementation of the current SORAC prototype is
                 also briefly discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "computer-aided architectural design; database
                 constraints; relationship semantics; semantic and
                 object-oriented data modeling",
  xxpages =      "157--192",
}

@Article{Teniente:1995:UKB,
  author =       "Ernest Teniente and Antoni Oliv{\'e}",
  title =        "Updating Knowledge Bases While Maintaining Their
                 Consistency",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "2",
  pages =        "193--241",
  month =        apr,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:33 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Oliv=eacute=:Antoni.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Teniente:Ernest.html",
  abstract =     "When updating a knowledge base, several problems may
                 arise. One of the most important problems is that of
                 integrity constraints satisfaction. The classic
                 approach to this problem has been to develop methods
                 for {\em checking\/} whether a given update violates an
                 integrity constraint. An alternative approach consists
                 of trying to repair integrity constraints violations by
                 performing additional updates that {\em maintain\/}
                 knowledge base consistency. Another major problem in
                 knowledge base updating is that of {\em view updating},
                 which determines how an update request should be
                 translated into an update of the underlying base facts.
                 We propose a new method for updating knowledge bases
                 while maintaining their consistency. Our method can be
                 used for both integrity constraints maintenance and
                 view updating. It can also be combined with any
                 integrity checking method for view updating and
                 integrity checking. The kind of updates handled by our
                 method are: updates of base facts, view updates,
                 updates of deductive rules, and updates of integrity
                 constraints. Our method is based on events and
                 transition rules, which explicitly define the
                 insertions and deletions induced by a knowledge base
                 update. Using these rules, an extension of the SLDNF
                 procedure allows us to obtain all possible minimal ways
                 of updating a knowledge base without violating any
                 integrity constraint.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "integrity checking; integrity maintenance; view
                 updating",
}

@Article{Guting:1995:RBS,
  author =       "Ralf Hartmut G{\"u}ting and Markus Schneider",
  title =        "Realm-Based Spatial Data Types: The {ROSE} Algebra",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "2",
  pages =        "243--286",
  month =        apr,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:33 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/G=uuml=ting:Ralf_Hartmut.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schneider:Markus.html",
  abstract =     "Spatial data types or algebras for database systems
                 should (1) be fully general, that is, closed under set
                 operations, (2) have formally defined semantics, (3) be
                 defined in terms of finite representations available in
                 computers, (4) offer facilities to enforce geometric
                 consistency of related spatial objects, and (5) be
                 independent of a particular DBMS data model, but
                 cooperate with any. We present an algebra that uses
                 {\em realms\/} as geometric domains underlying spatial
                 data types. A realm, as a general database concept, is
                 a finite, dynamic, user-defined structure underlying
                 one or more system data types. Problems of numerical
                 robustness and topological correctness are solved
                 within and below the realm layer so that spatial
                 algebras defined above a realm have very nice algebraic
                 properties. Realms also interact with a DMBS to enforce
                 geometric consistency on object creation or update. The
                 ROSE algebra is defined on top of realms and offers
                 general types to represent point, line, and region
                 features, together with a comprehensive set of
                 operations. It is described within a polymorphic type
                 system and interacts with a DMBS data model and query
                 language through an abstract {\em object model
                 interface.} An example integration of ROSE into the
                 object-oriented data model $O^2$ and its query language
                 is presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "finite resolution; geometric consistency; numerical
                 robustness; object model interface; realm; topological
                 correctness",
}

@Article{Templeton:1995:IDC,
  author =       "Marjorie Templeton and Herbert Henley and Edward Maros
                 and Darrel J. {Van Buer}",
  title =        "{InterViso}: Dealing With the Complexity of Federated
                 Database Access",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "2",
  pages =        "287--317",
  month =        apr,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:33 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Buer:Darrel_J=_Van.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Henley:Herbert.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Maros:Edward.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Templeton:Marjorie.html",
  abstract =     "Connectivity products are finally available to provide
                 the `highways' between computers containing data. IBM
                 has provided strong validation of the concept with
                 their `Information Warehouse.' DBMS vendors are
                 providing gateways into their products, and SQL is
                 being retrofitted on many older DBMSs to make it easier
                 to access data from standard 4GL products and
                 application development systems. The next step needed
                 for data integration is to provide (1) a common data
                 dictionary with a conceptual schema across the data to
                 mask the many differences that occur when databases are
                 developed independently and (2) a server that can
                 access and integrate the databases using information
                 from the data dictionary. In this article, we discuss
                 InterViso, one of the first commercial federated
                 database products. InterViso is based on Mermaid, which
                 was developed at SDC and Unisys (Templeton et al.,
                 1987b). It provides a value added layer above
                 connectivity products to handle views across databases,
                 schema translation, and transaction management.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehouse; database integration; federated
                 database",
  xxpages =      "287--318",
}

@Article{Atkinson:1995:SIP,
  author =       "Malcolm P. Atkinson and Ronald Morrison",
  title =        "Special Issue on Persistent Object Systems:
                 Orthogonally Persistent Object Systems",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "319--401",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Atkinson:Malcolm_P=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Morrison:Ronald.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Atkinson:1995:OPO,
  author =       "Malcolm Atkinson and Ronald Morrison",
  title =        "Orthogonally persistent object systems",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "319--402",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Persistent Application Systems (PASs) are of
                 increasing social and economic importance. They have
                 the potential to be long-lived, concurrently accessed,
                 and consist of large bodies of data and programs.
                 Typical examples of PASs are CAD/CAM systems, office
                 automation, CASE tools, software engineering
                 environments, and patient-care support systems in
                 hospitals. Orthogonally persistent object systems are
                 intended to provide improved support for the design,
                 construction, maintenance, and operation of PASs.
                 Persistence abstraction allows the creation and
                 manipulation of data in a manner that is independent of
                 its lifetime, thereby integrating the database view of
                 information with the programming language view. This
                 yields a number of advantages in terms of orthogonal
                 design and programmer productivity which are beneficial
                 for PASs. Design principles have been proposed for
                 persistent systems. By following these principles,
                 languages that provide persistence as a basic
                 abstraction have been developed. In this paper, the
                 motivation for orthogonal persistence is reviewed along
                 with the above mentioned design principles. The
                 concepts for integrating programming languages and
                 databases through the persistence abstraction, and
                 their benefits, are given. The technology to support
                 persistence, the achievements, and future directions of
                 persistence research are then discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database programming languages; orthogonal
                 persistence; persistent application systems; persistent
                 programming languages",
}

@Article{Albano:1995:FPL,
  author =       "Antonio Albano and Giorgio Ghelli and Renzo Orsini",
  title =        "{Fibonacci}: a Programming Language for Object
                 Databases",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "403--444",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Albano:Antonio.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Ghelli:Giorgio.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Orsini:Renzo.html",
  abstract =     "Fibonacci is an object-oriented database programming
                 language characterized by static and strong typing, and
                 by new mechanisms for modeling databases in terms of
                 objects with roles, classes, and associations. A brief
                 introduction to the language is provided to present
                 those features, which are particularly suited to
                 modeling complex databases. Examples of the use of
                 Fibonacci are given with reference to the prototype
                 implementation of the language.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data models; database programming languages; objects
                 with roles",
}

@Article{Ozsu:1995:TUB,
  author =       "M. Tamer {\"O}zsu and Randal J. Peters and Duane
                 Szafron and Boman Irani and Anna Lipka and Adriana
                 Mu{\~n}oz",
  title =        "{TIGUKAT}: a Uniform Behavioral Objectbase
                 Management System",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "445--492",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsu:M=_Tamer.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Irani:Boman.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lipka:Anna.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mu=ntilde=oz:Adriana.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Peters:Randal_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Szafron:Duane.html",
  abstract =     "We describe the TIGUKAT objectbase management system,
                 which is under development at the Laboratory for
                 Database Systems Research at the University of Alberta.
                 TIGUKAT has a novel object model, whose identifying
                 characteristics include a purely behavioral semantics
                 and a uniform approach to objects. Everything in the
                 system, including types, classes, collections,
                 behaviors, and functions, as well as meta-information,
                 is a first-class object with well-defined behavior. In
                 this way, the model abstracts everything, including
                 traditional structural notions such as instance
                 variables, method implementation, and schema
                 definition, into a uniform semantics of behaviors on
                 objects. Our emphasis in this article is on the object
                 model, its implementation, the persistence model, and
                 the query language. We also (briefly) present other
                 database management functions that are under
                 development such as the query optimizer, the version
                 control system, and the transaction manager.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database management; objectbase management; persistent
                 storage system; reflective system",
}

@Article{Benzaken:1995:TDP,
  author =       "V{\'e}ronique Benzaken and Anne Doucet",
  title =        "{Th{\'e}mis}: a Database Programming Language
                 Handling Integrity Constraints",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "493--517",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Benzaken:V=eacute=ronique.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Doucet:Anne.html",
  abstract =     "This article presents a database programming language,
                 Th{\'e}mis, which supports subtyping and class
                 hierarchies, and allows for the definition of integrity
                 constraints in a global and declarative way. We first
                 describe the salient features of the language: types,
                 names, classes, integrity constraints (including
                 methods), and transactions. The inclusion of methods
                 into integrity constraints allows an increase of the
                 declarative power of these constraints. Indeed, the
                 information needed to define a constraint is not always
                 stored in the database through attributes, but is
                 sometimes computed or derived data. Then, we address
                 the problem of efficiently checking constraints. More
                 specifically, we consider two different problems: (1)
                 statically reducing the number of constraints to be
                 checked, and (2) generating an efficient run-time
                 checker. Using simple strategies, one can significantly
                 improve the efficiency of the verification. We show how
                 to reduce the number of constraints to be checked by
                 characterizing the portions of the database that are
                 involved in both the constraints and in a transaction.
                 We also show how to generate efficient algorithms for
                 checking a large class of constraints. We show how all
                 the techniques presented exploit the underlying type
                 system, which provides significant help in solving (1)
                 and \1. Last, the current status of the Th{\'e}mis
                 prototype is presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database programming language; integrity constraints;
                 program analysis",
}

@Article{Kemper:1995:APS,
  author =       "Alfons Kemper and Donald Kossmann",
  title =        "Adaptable Pointer Swizzling Strategies in Object
                 Bases: Design, Realization, and Quantitative Analysis",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "519--566",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kossmann:Donald.html",
  abstract =     "In this article, different techniques for {\em
                 `pointer swizzling'\/} are classified and evaluated for
                 optimizing the access to main-memory resident
                 persistent objects. To speed up the access along
                 inter-object references, the persistent pointers in the
                 form of unique object identifiers (OIDs) are
                 transformed (swizzled) into main-memory pointers
                 (addresses). Pointer swizzling techniques can be
                 divided into two classes: (1) those that allow
                 replacement of swizzled objects from the buffer before
                 the end of an application program, and (2) those that
                 rule out the displacement of swizzled objects. The
                 first class (i.e., techniques that take `precautions'
                 for the replacement of swizzled objects) has not yet
                 been thoroughly investigated. Four different pointer
                 swizzling techniques allowing object replacement are
                 investigated and compared with the performance of an
                 object manager employing no pointer swizzling. The
                 extensive qualitative and quantitative
                 evaluation---only part of which could be presented in
                 this article---demonstrate that there is no {\em one\/}
                 superior pointer swizzling strategy for {\em all\/}
                 application profiles. Therefore, an adaptable object
                 base run-time system is devised that employs the full
                 range of pointer swizzling strategies, depending on the
                 application profile characteristics that are determined
                 by, for example, monitoring in combination with
                 sampling, user specifications, and/or program
                 analysis.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "object-oriented database systems; performance
                 evaluation; pointer swizzling",
  xxpages =      "519--567",
}

@Article{Anonymous:1995:SIP,
  author =       "Anonymous",
  title =        "Special issue on persistent object systems",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "3",
  pages =        "??--??",
  month =        jul,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:34 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Barbara:1995:SSO,
  author =       "Daniel Barbar{\'a} and Tomasz Imielinski",
  title =        "Special System-oriented Section: The Best of {SIGMOD}
                 1994: Sleepers and Workaholics: Caching Strategies in
                 Mobile Environments",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "567--602",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barbar=aacute=:Daniel.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Imielinski:Tomasz.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Barbara:1995:SWC,
  author =       "Daniel Barbar{\'a} and Tomasz Imieli{\'n}ski",
  title =        "Sleepers and workaholics: caching strategies in mobile
                 environments (extended version)",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "567--602",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In the mobile wireless computing environment of the
                 future, a large number of users, equipped with
                 low-powered palmtop machines, will query databases over
                 wireless communication channels. Palmtop-based units
                 will often be disconnected for prolonged periods of
                 time, due to battery power saving measures; palmtops
                 also will frequently relocate between different cells,
                 and will connect to different data servers at different
                 times. Caching of frequently accessed data items will
                 be an important technique that will reduce contention
                 on the narrow-bandwidth, wireless channel. However,
                 cache individualization strategies will be severely
                 affected by the disconnection and mobility of the
                 clients. The server may no longer know which clients
                 are currently residing under its cell, and which of
                 them are currently on. We propose a taxonomy of
                 different cache invalidation strategies, and study the
                 impact of clients' disconnection times on their
                 performance. We study ways to improve further the
                 efficiency of the invalidation techniques described. We
                 also describe how our techniques can be implemented
                 over different network environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "caching; data management; information services;
                 wireless",
}

@Article{Nyberg:1995:ACS,
  author =       "Chris Nyberg and Tom Barclay and Zarka Cvetanovic and
                 Jim Gray and David B. Lomet",
  title =        "{AlphaSort}: a Cache-Sensitive Parallel External
                 Sort",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "603--627",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barclay:Tom.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cvetanovic:Zarka.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gray:Jim.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Nyberg:Chris.html",
  abstract =     "A new sort algorithm, called AlphaSort, demonstrates
                 that commodity processors and disks can handle
                 commercial batch workloads. Using commodity processors,
                 memory, and arrays of SCSI disks, AlphaSort runs the
                 industry-standard sort benchmark in seven seconds. This
                 beats the best published record on a 32-CPU 32-disk
                 Hypercube by 8:1. On another benchmark, AlphaSort
                 sorted more than a gigabyte in one minute. AlphaSort is
                 a cache-sensitive, memory-intensive sort algorithm. We
                 argue that modern architectures require algorithm
                 designers to re-examine their use of the memory
                 hierarchy. AlphaSort uses clustered data structures to
                 get good cache locality, file striping to get high disk
                 bandwidth, QuickSort to generate runs, and
                 replacement-selection to merge the runs. It uses shared
                 memory multiprocessors to break the sort into subsort
                 chores. Because startup times are becoming a
                 significant part of the total time, we propose two new
                 benchmarks: (1) MinuteSort: how much can you sort in
                 one minute, and (2) PennySort: how much can you sort
                 for one penny.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Alpha; cache; DEC 7000; disk; memory; parallel; sort;
                 striping",
  xxpages =      "603--628",
}

@Article{White:1995:QHP,
  author =       "Seth J. White and David J. DeWitt",
  title =        "{QuickStore}: a High Performance Mapped Object
                 Store",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "629--673",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/White:Seth_J=.html",
  abstract =     "QuickStore is a memory-mapped storage system for
                 persistent C++, built on top of the EXODUS Storage
                 Manager. QuickStore provides fast access to in-memory
                 objects by allowing application programs to access
                 objects via normal virtual memory pointers. This
                 article presents the results of a detailed performance
                 study using the OO7 benchmark. The study compares the
                 performance of QuickStore with the latest
                 implementation of the E programming language. The
                 QuickStore and E systems exemplify the two basic
                 approaches (hardware and software) that have been used
                 to implement persistence in object-oriented database
                 systems. In addition, both systems use the same
                 underlying storage manager and compiler, allowing us to
                 make a truly apples-to-apples comparison of the
                 hardware and software techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "benchmark; client-server; memory-mapped;
                 object-oriented; performance; pointer swizzling",
}

@Article{Swami:1995:EPF,
  author =       "Arun N. Swami and K. Bernhard Schiefer",
  title =        "Estimating Page Fetches for Index Scans with Finite
                 {LRU} Buffers",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "675--701",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schiefer:K=_Bernhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Swami:Arun_N=.html",
  abstract =     "We describe an algorithm for estimating the number of
                 page fetches for a partial or complete scan of a B-tree
                 index. The algorithm obtains estimates for the number
                 of page fetches for an index scan when given the number
                 of tuples selected and the number of LRU buffers
                 currently available. The algorithm has an initial phase
                 that is performed exactly once before any estimates are
                 calculated. This initial phase, involving LRU buffer
                 modeling, requires a scan of all the index entries and
                 calculates the number of page fetches for different
                 buffer sizes. An approximate empirical model is
                 obtained from this data. Subsequently, an inexpensive
                 estimation procedure is called by the query optimizer
                 whenever it needs an estimate of the page fetches for
                 the index scan. This procedure utilizes the empirical
                 model obtained in the initial phase.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "estimation; index scan; LRU; query optimization",
}

@Article{Landau:1995:HQA,
  author =       "Gad M. Landau and Jeanette P. Schmidt and Vassilis J.
                 Tsotras",
  title =        "Historical queries along multiple lines of time
                 evolution",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "703--726",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional approaches to addressing historical
                 queries assume a {\em single\/} line of time evolution;
                 that is, a system (database, relation) evolves over
                 time through a sequence of transactions. Each
                 transaction always applies to the unique, current state
                 of the system, resulting in a new current state. There
                 are, however, complex applications where the system's
                 state evolves into {\em multiple\/} lines of evolution.
                 In general, this creates a tree (hierarchy) of
                 evolution lines, where each tree node represents the
                 time evolution of a particular subsystem. Multiple
                 lines create novel historical queries, such as {\em
                 vertical\/} or {\em horizontal\/} historical queries.
                 The key characteristic of these problems is that
                 portions of the history are shared; answering
                 historical queries should not necessitate duplication
                 of shared histories as this could increase the storage
                 requirements dramatically. Both the vertical and
                 horizontal historical queries have two parts: a
                 `search' part, where the time of interest is located
                 together with the appropriate subsystem, and a
                 reconstruction part, where the subsystem's state is
                 reconstructed for that time. This article focuses on
                 the search part; several reconstruction methods,
                 designed for single evolution lines can be applied once
                 the appropriate time of interest is located. For both
                 the vertical and the horizontal historical queries, we
                 present algorithms that work without duplicating shared
                 histories. Combinations of the vertical and horizontal
                 queries are possible, and enable searching in both
                 dimensions of the tree of evolutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; CAD databases; data-structures;
                 rollback databases",
}

@Article{Landau:1995:RJA,
  author =       "Gad M. Landau and Jeanette P. Schmidt and Vassilis J.
                 Tsotras",
  title =        "Regular Journal Articles: Historical Queries Along
                 Multiple Lines of Time Evolution",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "703--726",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:01 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Landau:Gad_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Schmidt:Jeanette_P=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsotras:Vassilis_J=.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abiteboul:1995:PLM,
  author =       "Serge Abiteboul and Catriel Beeri",
  title =        "The Power of Languages for the Manipulation of Complex
                 Values",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "727--794",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb4.html;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abiteboul:Serge.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Beeri:Catriel.html",
  abstract =     "Various models and languages for describing and
                 manipulating hierarchically structured data have been
                 proposed. Algebraic, calculus-based, and
                 logic-programming oriented languages have all been
                 considered. This article presents a general model for
                 complex values (i.e., values with hierarchical
                 structures), and languages for it based on the three
                 paradigms. The algebraic language generalizes those
                 presented in the literature; it is shown to be related
                 to the functional style of programming advocated by
                 Backus (1978). The notion of domain independence (from
                 relational databases) is defined, and syntactic
                 restrictions (referred to as safety conditions) on
                 calculus queries are formulated to guarantee domain
                 independence. The main results are: The
                 domain-independent calculus, the safe calculus, the
                 algebra, and the logic-programming oriented language
                 have equivalent expressive power. In particular,
                 recursive queries, such as the transitive closure, can
                 be expressed in each of the languages. For this result,
                 the algebra needs the powerset operation. A more
                 restricted version of safety is presented, such that
                 the restricted safe calculus is equivalent to the
                 algebra without the powerset. The results are extended
                 to the case where arbitrary functions and predicates
                 are used in the languages.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "complex object; complex value; database; database
                 model; query language",
}

@Article{Anonymous:1995:SSO,
  author =       "Anonymous",
  title =        "Special system-oriented section: the best of {SIGMOD}
                 `94",
  journal =      j-VLDB-J,
  volume =       "4",
  number =       "4",
  pages =        "??--??",
  month =        oct,
  year =         "1995",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:35 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{DeWitt:1996:POT,
  author =       "David J. {De Witt} and Jeffrey F. Naughton and John C.
                 Shafer and Shivakumar Venkataraman",
  title =        "Parallelizing {OODBMS} traversals: a performance
                 evaluation",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "3--18",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Naughton:Jeffrey_F=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shafer:John_C=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Venkataraman:Shivakumar.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050003.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050003.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050003.ps.gz",
  abstract =     "In this paper we describe the design and
                 implementation of {\em ParSets}, a means of exploiting
                 parallelism in the SHORE OODBMS. We used ParSets to
                 parallelize the graph traversal portion of the OO7
                 OODBMS benchmark, and present speedup and scaleup
                 results from parallel SHORE running these traversals on
                 a cluster of commodity workstations connected by a
                 standard Ethernet. For some OO7 traversals, SHORE
                 achieved excellent speedup and scaleup; for other OO7
                 traversals, only marginal speedup and scaleup occurred.
                 The characteristics of these traversals shed light on
                 when the ParSet approach to parallelism can and cannot
                 be applied to speed up an application.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Object-oriented database management systems;
                 Parallelism; ParSets; SHORE",
}

@Article{Sivasankaran:1996:PAR,
  author =       "Rajendran M. Sivasankaran and John A. Stankovic and
                 Donald F. Towsley and Bhaskar Purimetla and Krithi
                 Ramamritham",
  title =        "Priority Assignment in Real-Time Active Databases",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "19--34",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Purimetla:Bhaskar.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamritham:Krithi.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sivasankaran:Rajendran_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stankovic:John_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Towsley:Donald_F=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050019.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050019.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050019.ps.gz",
  abstract =     "Active databases and real-time databases have been
                 important areas of research in the recent past. It has
                 been recognized that many benefits can be gained by
                 integrating real-time and active database technologies.
                 However, not much work has been done in the area of
                 transaction processing in real-time active databases.
                 This paper deals with an important aspect of
                 transaction processing in real-time active databases,
                 namely the problem of assigning priorities to
                 transactions. In these systems, time-constrained
                 transactions trigger other transactions during their
                 execution. We present three policies for assigning
                 priorities to parent, immediate and deferred
                 transactions executing on a multiprocessor system and
                 then evaluate the policies through simulation. The
                 policies use different amounts of semantic information
                 about transactions to assign the priorities. The
                 simulator has been validated against the results of
                 earlier published studies. We conducted experiments in
                 three settings: a task setting, a main memory database
                 setting and a disk-resident database setting. Our
                 results demonstrate that dynamically changing the
                 priorities of transactions, depending on their behavior
                 (triggering rules), yields a substantial improvement in
                 the number of triggering transactions that meet their
                 deadline in all three settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Active databases; Coupling mode; Deadlines;
                 ECA-priority assignment; Real-time databases",
}

@Article{Keller:1996:PBC,
  author =       "Arthur M. Keller and Julie Basu",
  title =        "A Predicate-based Caching Scheme for Client-Server
                 Database Architectures",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "35--47",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Basu:Julie.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Keller:Arthur_M=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050035.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050035.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050035.ps.gz",
  abstract =     "We propose a new client-side data-caching scheme for
                 relational databases with a central server and multiple
                 clients. Data are loaded into each client cache based
                 on queries executed on the central database at the
                 server. These queries are used to form predicates that
                 describe the cache contents. A subsequent query at the
                 client may be satisfied in its local cache if we can
                 determine that the query result is entirely contained
                 in the cache. This issue is called {\em cache
                 completeness}. A separate issue, {\em cache currency},
                 deals with the effect on client caches of updates
                 committed at the central database. We examine the
                 various performance tradeoffs and optimization issues
                 involved in addressing the questions of cache currency
                 and completeness using predicate descriptions and
                 suggest solutions that promote good dynamic behavior.
                 Lower query-response times, reduced message traffic,
                 higher server throughput, and better scalability are
                 some of the expected benefits of our approach over
                 commonly used relational server-side and object
                 ID-based or page-based client-side caching.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cache completeness; cache currency; caching; multiple
                 clients; relational databases",
}

@Article{Stonebraker:1996:MWA,
  author =       "Michael Stonebraker and Paul M. Aoki and Witold Litwin
                 and Avi Pfeffer and Adam Sah and Jeff Sidell and Carl
                 Staelin and Andrew Yu",
  title =        "{Mariposa}: a Wide-Area Distributed Database
                 System",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "48--63",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aoki:Paul_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Litwin:Witold.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pfeffer:Avi.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sah:Adam.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sidell:Jeff.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Staelin:Carl.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Stonebraker:Michael.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Andrew.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050048.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050048.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050048.ps.gz",
  abstract =     "The requirements of wide-area distributed database
                 systems differ dramatically from those of local-area
                 network systems. In a wide-area network (WAN)
                 configuration, individual sites usually report to
                 different system administrators, have different access
                 and charging algorithms, install site-specific data
                 type extensions, and have different constraints on
                 servicing remote requests. Typical of the last point
                 are production transaction environments, which are
                 fully engaged during normal business hours, and cannot
                 take on additional load. Finally, there may be many
                 sites participating in a WAN distributed DBMS.In this
                 world, a single program performing global query
                 optimization using a cost-based optimizer will not work
                 well. Cost-based optimization does not respond well to
                 site-specific type extension, access constraints,
                 charging algorithms, and time-of-day constraints.
                 Furthermore, traditional cost-based distributed
                 optimizers do not scale well to a large number of
                 possible processing sites. Since traditional
                 distributed DBMSs have all used cost-based optimizers,
                 they are not appropriate in a WAN environment, and a
                 new architecture is required. We have proposed and
                 implemented an economic paradigm as the solution to
                 these issues in a new distributed DBMS called Mariposa.
                 In this paper, we present the architecture and
                 implementation of Mariposa and discuss early feedback
                 on its operating characteristics.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "autonomy; databases; distributed systems; economic
                 site; name service; wide-area network",
}

@Article{Harris:1996:JAC,
  author =       "Evan P. Harris and Kotagiri Ramamohanarao",
  title =        "Join Algorithm Costs Revisited",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "64--84",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Harris:Evan_P=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamohanarao:Kotagiri.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050064.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050064.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050064.ps.gz",
  abstract =     "A method of analysing join algorithms based upon the
                 time required to access, transfer and perform the
                 relevant CPU-based operations on a disk page is
                 proposed. The costs of variations of several of the
                 standard join algorithms, including nested block,
                 sort-merge, GRACE hash and hybrid hash, are presented.
                 For a given total buffer size, the cost of these join
                 algorithms depends on the parts of the buffer allocated
                 for each purpose. For example, when joining two
                 relations using the nested block join algorithm, the
                 amount of buffer space allocated for the outer and
                 inner relations can significantly affect the cost of
                 the join. Analysis of expected and experimental results
                 of various join algorithms show that a combination of
                 the optimal nested block and optimal GRACE hash join
                 algorithms usually provide the greatest cost benefit,
                 unless the relation size is a small multiple of the
                 memory size. Algorithms to quickly determine a buffer
                 allocation producing the minimal cost for each of these
                 algorithms are presented. When the relation size is a
                 small multiple of the amount of main memory available
                 (typically up to three to six times), the hybrid hash
                 join algorithm is preferable.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "join algorithms; minimisation; optimal buffer
                 allocation",
}

@Article{Ramamritham:1996:TCC,
  author =       "Krithi Ramamritham and Panos K. Chrysanthis",
  title =        "A taxonomy of correctness criteria in database
                 applications (*)",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "1",
  pages =        "85--97",
  month =        jan,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:36 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chrysanthis:Panos_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramamritham:Krithi.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005001/60050085.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050085.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005001/60050085.ps.gz",
  abstract =     "Whereas serializability captures {\em database
                 consistency requirements\/} and {\em transaction
                 correctness properties\/} via a single notion, recent
                 research has attempted to come up with correctness
                 criteria that view these two types of requirements
                 independently. The search for more flexible correctness
                 criteria is partly motivated by the introduction of new
                 transaction models that extend the traditional atomic
                 transaction model. These extensions came about because
                 the atomic transaction model in conjunction with
                 serializability is found to be very constraining when
                 used in advanced applications (e.g., design databases)
                 that function in distributed, cooperative, and
                 heterogeneous environments. In this article we develop
                 a taxonomy of various {\em correctness criteria\/} that
                 focus on database consistency requirements and
                 transaction correctness properties from the viewpoint
                 of {\em what\/} the different dimensions of these two
                 are. This taxonomy allows us to categorize correctness
                 criteria that have been proposed in the literature. To
                 help in this categorization, we have applied a uniform
                 specification technique, based on ACTA, to express the
                 various criteria. Such a categorization helps shed
                 light on the similarities and differences between
                 different criteria and places them in perspective.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; database correctness criteria;
                 formal specifications; transaction processing",
}

@Article{Tsatalos:1996:GVT,
  author =       "Odysseas G. Tsatalos and Marvin H. Solomon and Yannis
                 E. Ioannidis",
  title =        "The {GMAP}: a Versatile Tool for Physical Data
                 Independence",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "2",
  pages =        "101--118",
  month =        apr,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:38 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ioannidis:Yannis_E=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Solomon:Marvin_H=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsatalos:Odysseas_G=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005002/60050101.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050101.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050101.ps.gz",
  abstract =     "Physical data independence is touted as a central
                 feature of modern database systems. It allows users to
                 frame queries in terms of the logical structure of the
                 data, letting a query processor automatically translate
                 them into optimal plans that access physical storage
                 structures. Both relational and object-oriented
                 systems, however, force users to frame their queries in
                 terms of a logical schema that is directly tied to
                 physical structures. We present an approach that
                 eliminates this dependence. All storage structures are
                 defined in a declarative language based on relational
                 algebra as functions of a logical schema. We present an
                 algorithm, integrated with a conventional query
                 optimizer, that translates queries over this logical
                 schema into plans that access the storage structures.
                 We also show how to compile update requests into plans
                 that update all relevant storage structures
                 consistently and optimally. Finally, we report on
                 experiments with a prototype implementation of our
                 approach that demonstrate how it allows storage
                 structures to be tuned to the expected or observed
                 workload to achieve significantly better performance
                 than is possible with conventional techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "indexing; materialized views; physical data
                 independence; physical database design",
}

@Article{Poulovassilis:1996:AQO,
  author =       "Alexandra Poulovassilis and Carol Small",
  title =        "Algebraic Query Optimisation for Database Programming
                 Languages",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "2",
  pages =        "119--132",
  month =        apr,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:38 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Poulovassilis:Alexandra.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Small:Carol.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005002/60050119.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050119.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050119.ps.gz",
  abstract =     "A major challenge still facing the designers and
                 implementors of database programming languages (DBPLs)
                 is that of query optimisation. We investigate algebraic
                 query optimisation techniques for DBPLs in the context
                 of a purely declarative functional language that
                 supports sets as first-class objects. Since the
                 language is computationally complete issues such as
                 non-termination of expressions and construction of
                 infinite data structures can be investigated, whilst
                 its declarative nature allows the issue of side effects
                 to be avoided and a richer set of equivalences to be
                 developed. The language has a well-defined semantics
                 which permits us to reason formally about the
                 properties of expressions, such as their equivalence
                 with other expressions and their termination. The
                 support of a set bulk data type enables much prior work
                 on the optimisation of relational languages to be
                 utilised. In the paper we first give the syntax of our
                 archetypal DBPL and briefly discuss its semantics. We
                 then define a small but powerful algebra of operators
                 over the set data type, provide some key equivalences
                 for expressions in these operators, and list
                 transformation principles for optimising expressions.
                 Along the way, we identify some caveats to well-known
                 equivalences for non-deductive database languages. We
                 next extend our language with two higher level
                 constructs commonly found in functional DBPLs: set
                 comprehensions and functions with known inverses. Some
                 key equivalences for these constructs are provided, as
                 are transformation principles for expressions in them.
                 Finally, we investigate extending our equivalences for
                 the set operators to the analogous operators over bags.
                 Although developed and formally proved in the context
                 of a functional language, our findings are directly
                 applicable to other DBPLs of similar expressiveness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "algebraic manipulation; database management; database
                 programming languages; functional languages; query
                 optimisation",
}

@Article{Amiel:1996:TSR,
  author =       "Eric Amiel and Marie-Jo Bellosta and Eric Dujardin and
                 Eric Simon",
  title =        "Type-safe Relaxing of Schema Consistency Rules for
                 Flexible Modeling in {OODBMS}",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "2",
  pages =        "133--150",
  month =        apr,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:38 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Amiel:Eric.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Bellosta:Marie=Jo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dujardin:Eric.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Simon:Eric.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005002/60050133.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050133.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050133.ps.gz",
  abstract =     "Object-oriented databases enforce behavioral schema
                 consistency rules to guarantee type safety, i.e., that
                 no run-time type error can occur. When the schema must
                 evolve, some schema updates may violate these rules. In
                 order to maintain behavioral schema consistency,
                 traditional solutions require significant changes to
                 the types, the type hierarchy and the code of existing
                 methods. Such operations are very expensive in a
                 database context. To ease schema evolution, we propose
                 to support exceptions to the behavioral consistency
                 rules without sacrificing type safety. The basic idea
                 is to detect unsafe statements in a method code at
                 compile-time and check them at run-time. The run-time
                 check is performed by a specific clause that is
                 automatically inserted around unsafe statements. This
                 check clause warns the programmer of the safety problem
                 and lets him provide exception-handling code. Schema
                 updates can therefore be performed with only minor
                 changes to the code of methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "contravariance; covariance; object-oriented databases;
                 schema evolution; type safety",
  xxtitle =      "Type-safe relaxing of schema consistency rules for
                 flexible modelling in {OODBMS}",
}

@Article{Fang:1996:EOB,
  author =       "Doug Fang and Shahram Ghandeharizadeh and Dennis
                 McLeod",
  title =        "An experimental object-based sharing system for
                 networked databases",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "2",
  pages =        "151--165",
  month =        apr,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:38 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fang:Doug.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Ghandeharizadeh:Shahram.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005002/60050151.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050151.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005002/60050151.ps.gz;
                 http://link.springer.de/link/service/journals/00778/tocs/mailto: HREF="mailto:helpdesk@link.springer.de">helpdesk@link.springer.de",
  abstract =     "An approach and mechanism for the transparent sharing
                 of objects in an environment of interconnected
                 (networked), autonomous database systems is presented.
                 An experimental prototype system has been designed and
                 implemented, and an analysis of its performance
                 conducted. Previous approaches to sharing in this
                 environment typically rely on the use of a global,
                 integrated conceptual database schema; users and
                 applications must pose queries at this new level of
                 abstraction to access remote information. By contrast,
                 our approach provides a mechanism that allows users to
                 import remote objects directly into their local
                 database transparently; access to remote objects is
                 virtually the same as access to local objects. The
                 experimental prototype system that has been designed
                 and implemented is based on the Iris and Omega
                 object-based database management systems; this system
                 supports the sharing of data and meta-data objects
                 (information units) as well as units of behavior. The
                 results of experiments conducted to evaluate the
                 performance of our mechanism demonstrate the
                 feasibility of database transparent object sharing in a
                 federated environment, and provide insight into the
                 performance overhead and tradeoffs involved.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database system interoperability; experimental
                 prototype benchmarking; object sharing",
  xxtitle =      "An Experimental System for Object-Based Sharing in
                 Federated Databases",
}

@Article{Dey:1996:CTR,
  author =       "Debabrata Dey and Terence M. Barron and Veda C.
                 Storey",
  title =        "A Complete Temporal Relational Algebra",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "3",
  pages =        "167--180",
  month =        aug,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Barron:Terence_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dey:Debabrata.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Storey:Veda_C=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005003/60050167.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050167.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050167.ps.gz",
  abstract =     "Various temporal extensions to the relational model
                 have been proposed. All of these, however, deviate
                 significantly from the original relational model. This
                 paper presents a temporal extension of the relational
                 algebra that is not significantly different from the
                 original relational model, yet is at least as
                 expressive as any of the previous approaches. This
                 algebra employs multidimensional tuple time-stamping to
                 capture the complete temporal behavior of data. The
                 basic relational operations are redefined as consistent
                 extensions of the existing operations in a manner that
                 preserves the basic algebraic equivalences of the
                 snapshot (i.e., conventional static) algebra. A new
                 operation, namely {\em temporal projection}, is
                 introduced. The complete update semantics are formally
                 specified and aggregate functions are defined. The
                 algebra is closed, and reduces to the snapshot algebra.
                 It is also shown to be at least as expressive as the
                 calculus-based temporal query language TQuel. In order
                 to assess the algebra, it is evaluated using a set of
                 twenty-six criteria proposed in the literature, and
                 compared to existing temporal relational algebras. The
                 proposed algebra appears to satisfy more criteria than
                 any other existing algebra.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "historical databases; relational algebra; temporal
                 databases; transaction time; valid time",
  remark =       "Check month: July or August??",
}

@Article{Shyy:1996:DIK,
  author =       "Yuh-Ming Shyy and Javier Arroyo and Stanley Y. W. Su
                 and Herman Lam",
  title =        "The Design and Implementation of {K}: a High-Level
                 Knowledge-Base Programming Language of {OSAM*.KBMS}",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "3",
  pages =        "181--195",
  month =        aug,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Arroyo:Javier.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lam:Herman.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shyy:Yuh=Ming.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Su:Stanley_Y=_W=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005003/60050181.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050181.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050181.ps.gz",
  abstract =     "The OSAM*.KBMS is a knowledge-base management system,
                 or the so-called next-generation database management
                 system, for non-traditional data/knowledge-intensive
                 applications. In order to define, query, and manipulate
                 a knowledge base, as well as to write codes to
                 implement any application system, we have developed an
                 object-oriented knowledge-base programming language
                 called K to serve as the high-level interface of
                 OSAM*.KBMS. This paper presents the design of K, its
                 implementation, and its supporting KBMS developed at
                 the Database Systems Research and Development Center of
                 the University of Florida.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "abstractions; association patterns; knowledge-base
                 programming language; object-oriented knowledge model;
                 structural associations",
  remark =       "Check month: July or August??",
}

@Article{Harder:1996:APS,
  author =       "Theo H{\"a}rder and Joachim Reinert",
  title =        "Access Path Support for Referential Integrity in
                 {SQL2}",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "3",
  pages =        "196--214",
  month =        aug,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Reinert:Joachim.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005003/60050196.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050196.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050196.ps.gz",
  abstract =     "The relational model of data incorporates fundamental
                 assertions for entity integrity and referential
                 integrity. Recently, these so-called relational
                 invariants were more precisely specified by the new
                 SQL2 standard. Accordingly, they have to be guaranteed
                 by a relational DBMS to its users and, therefore, all
                 issues of semantics and implementation became very
                 important. The specification of referential integrity
                 embodies quite a number of complications including the
                 MATCH clause and a collection of referential actions.
                 In particular, $\hbox{{\tt MATCH PARTIAL}}$ turns out
                 to be hard to understand and, if applied, difficult and
                 expensive to maintain. In this paper, we identify the
                 functional requirements for preserving referential
                 integrity. At a level free of implementational
                 considerations, the number and kinds of searches
                 necessary for referential integrity maintenance are
                 derived. Based on these findings, our investigation is
                 focused on the question of how the functional
                 requirements can be supported by implementation
                 concepts in an efficient way. We determine the search
                 cost for referential integrity maintenance (in terms of
                 page references) for various possible access path
                 structures. Our main result is that a combined access
                 path structure is the most appropriate for checking the
                 regular MATCH option, whereas $\hbox{{\tt MATCH
                 PARTIAL}}$ requires very expensive and complicated
                 check procedures. If it cannot be avoided at all, the
                 best support is achieved by a combination of multiple
                 $\mbox{B}^*$-trees.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access path support; MATCH clause; referential
                 integrity; relational databases; SQL2",
  remark =       "Check month: July or August??",
}

@Article{Ooi:1996:INE,
  author =       "Beng Chin Ooi and Jiawei Han and Hongjun Lu and Kian
                 Lee Tan",
  title =        "Index Nesting --- An Efficient Approach to Indexing in
                 Object-Oriented Databases",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "3",
  pages =        "215--228",
  month =        aug,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Han:Jiawei.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lu:Hongjun.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ooi:Beng_Chin.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Kian=Lee.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005003/60050215.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050215.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005003/60050215.ps.gz;
                 http://link.springer.de/link/service/journals/00778/tocs/mailto: HREF="mailto:helpdesk@link.springer.de">helpdesk@link.springer.de",
  abstract =     "In object-oriented database systems where the concept
                 of the superclass-subclass is supported, an instance of
                 a subclass is also an instance of its superclass.
                 Consequently, the access scope of a query against a
                 class in general includes the access scope of all its
                 subclasses, unless specified otherwise. An index to
                 support superclass-subclass relationship efficiently
                 must provide efficient associative retrievals of
                 objects from a single class or from several classes in
                 a class hierarchy. This paper presents an efficient
                 index called the hierarchical tree (the H-tree). For
                 each class, an H-tree is maintained, allowing efficient
                 search on a single class. These H-trees are
                 appropriately linked to capture the superclass-subclass
                 relationships, thus allowing efficient retrievals of
                 instances from a class hierarchy. Both experimental and
                 analytical results indicate that the H-tree is an
                 efficient indexing structure.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "indexing structures; OODB; query retrieval",
  remark =       "Check month: July or August??",
}

@Article{Antoshenkov:1996:QPO,
  author =       "Gennady Antoshenkov and Mohamed Ziauddin",
  title =        "Query Processing and Optimization in {Oracle Rdb}",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "4",
  pages =        "229--237",
  month =        dec,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Antoshenkov:Gennady.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Ziauddin:Mohamed.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005004/60050229.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050229.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050229.ps.gz",
  abstract =     "This paper contains an overview of the technology used
                 in the query processing and optimization component of
                 Oracle Rdb, a relational database management system
                 originally developed by Digital Equipment Corporation
                 and now under development by Oracle Corporation. Oracle
                 Rdb is a production system that supports the most
                 demanding database applications, runs on multiple
                 platforms and in a variety of environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic optimization; optimizer; query transformation;
                 relational database; sampling",
}

@Article{Mylopoulos:1996:BKB,
  author =       "John Mylopoulos and Vinay K. Chaudhri and Dimitris
                 Plexousakis and Adel Shrufi and Thodoros Topologlou",
  title =        "Building Knowledge Base Management Systems",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "4",
  pages =        "238--263",
  month =        dec,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chaudhri:Vinay_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mylopoulos:John.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Plexousakis:Dimitris.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shrufi:Adel.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Topaloglou:Thodoros.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005004/60050238.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050238.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050238.ps.gz",
  abstract =     "Advanced applications in fields such as CAD, software
                 engineering, real-time process control, corporate
                 repositories and digital libraries require the
                 construction, efficient access and management of large,
                 shared knowledge bases. Such knowledge bases cannot be
                 built using existing tools such as expert system
                 shells, because these do not scale up, nor can they be
                 built in terms of existing database technology, because
                 such technology does not support the rich
                 representational structure and inference mechanisms
                 required for knowledge-based systems. This paper
                 proposes a generic architecture for a knowledge base
                 management system intended for such applications. The
                 architecture assumes an object-oriented knowledge
                 representation language with an assertional sublanguage
                 used to express constraints and rules. It also provides
                 for general-purpose deductive inference and
                 special-purpose temporal reasoning. Results reported in
                 the paper address several knowledge base management
                 issues. For storage management, a new method is
                 proposed for generating a logical schema for a given
                 knowledge base. Query processing algorithms are offered
                 for semantic and physical query optimization, along
                 with an enhanced cost model for query cost estimation.
                 On concurrency control, the paper describes a novel
                 concurrency control policy which takes advantage of
                 knowledge base structure and is shown to outperform
                 two-phase locking for highly structured knowledge bases
                 and update-intensive transactions. Finally, algorithms
                 for compilation and efficient processing of constraints
                 and rules during knowledge base operations are
                 described. The paper describes original results,
                 including novel data structures and algorithms, as well
                 as preliminary performance evaluation data. Based on
                 these results, we conclude that knowledge base
                 management systems which can accommodate large
                 knowledge bases are feasible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; constraint enforcement; knowledge
                 base management systems; rule management; storage
                 management",
}

@Article{Becker:1996:AOM,
  author =       "Bruno Becker and Stephan Gschwind and Thomas Ohler and
                 Bernhard Seeger and Peter Widmayer",
  title =        "An Asymptotically Optimal Multiversion {B}-Tree",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "4",
  pages =        "264--275",
  month =        dec,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Becker:Bruno.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gschwind:Stephan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ohler:Thomas.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seeger:Bernhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Widmayer:Peter.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005004/60050264.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050264.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050264.ps.gz",
  abstract =     "In a variety of applications, we need to keep track of
                 the development of a data set over time. For
                 maintaining and querying these multiversion data
                 efficiently, external storage structures are an
                 absolute necessity. We propose a multiversion B-tree
                 that supports insertions and deletions of data items at
                 the current version and range queries and exact match
                 queries for any version, current or past. Our
                 multiversion B-tree is asymptotically optimal in the
                 sense that the time and space bounds are asymptotically
                 the same as those of the (single-version) B-tree in the
                 worst case. The technique we present for transforming a
                 (single-version) B-tree into a multiversion B-tree is
                 quite general: it applies to a number of hierarchical
                 external access structures with certain properties
                 directly, and it can be modified for others.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; information systems; physical design;
                 versioned data",
}

@Article{Kashyap:1996:SSS,
  author =       "Vipul Kashyap and Amit P. Sheth",
  title =        "Semantic and Schematic Similarities Between Database
                 Objects: a Context-Based Approach",
  journal =      j-VLDB-J,
  volume =       "5",
  number =       "4",
  pages =        "276--304",
  month =        dec,
  year =         "1996",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:39 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb5.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t6005004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kashyap:Vipul.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sheth:Amit_P=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/6005004/60050276.htm;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050276.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/6005004/60050276.ps.gz;
                 http://link.springer.de/link/service/journals/00778/tocs/mailto: HREF="mailto:helpdesk@link.springer.de">helpdesk@link.springer.de",
  abstract =     "In a multidatabase system, schematic conflicts between
                 two objects are usually of interest only when the
                 objects have some semantic similarity. We use the
                 concept of {\em semantic proximity}, which is
                 essentially an {\em abstraction/mapping\/} between the
                 domains of the two objects associated with the {\em
                 context of comparison}. An explicit though partial
                 context representation is proposed and the specificity
                 relationship between contexts is defined. The contexts
                 are organized as a meet semi-lattice and associated
                 operations like the greatest lower bound are defined.
                 The context of comparison and the type of abstractions
                 used to relate the two objects form the basis of a
                 semantic taxonomy. At the {\em semantic level}, the
                 intensional description of database objects provided by
                 the context is expressed using description logics. The
                 terms used to construct the contexts are obtained from
                 {\em domain-specific ontologies}. {\em Schema
                 correspondences\/} are used to store mappings from the
                 semantic level to the data level and are associated
                 with the respective contexts. Inferences about database
                 content at the federation level are modeled as changes
                 in the context and the associated schema
                 correspondences. We try to reconcile the dual
                 (schematic and semantic) perspectives by enumerating
                 {\em possible semantic similarities\/} between objects
                 having schema and data conflicts, and modeling schema
                 correspondences as the projection of semantic proximity
                 {\em with respect to (wrt)\/} context.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Evangelidis:1997:HTM,
  author =       "Georgios Evangelidis and David B. Lomet and Betty
                 Salzberg",
  title =        "The {hB} $^{\Pi}$-tree: a multi-attribute index
                 supporting concurrency, recovery and node
                 consolidation",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "1",
  pages =        "1--25",
  month =        feb,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:40 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Evangelidis:Georgios.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Salzberg:Betty.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006001/70060001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060001.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060001.ps.gz",
  abstract =     "We propose a new multi-attribute index. Our approach
                 combines the hB-tree, a multi-attribute index, and the
                 $\Pi$-tree, an abstract index which offers efficient
                 concurrency and recovery methods. We call the resulting
                 method the hB $^\Pi$-tree. We describe several versions
                 of the hB $^\Pi$-tree, each using a different
                 node-splitting and index-term-posting algorithm. We
                 also describe a new node deletion algorithm. We have
                 implemented all the versions of the hB $^\Pi$-tree. Our
                 performance results show that even the version that
                 offers no performance guarantees, actually performs
                 very well in terms of storage utilization, index size
                 (fan-out), exact-match and range searching, under
                 various data types and distributions. We have also
                 shown that our index is fairly insensitive to increases
                 in dimension. Thus, it is suitable for indexing
                 high-dimensional applications. This property and the
                 fact that all our versions of the hB $^\Pi$-tree can
                 use the $\Pi$-tree concurrency and recovery algorithms
                 make the hB $^\Pi$-tree a promising candidate for
                 inclusion in a general-purpose DBMS.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency; multi-attribute index; node
                 consolidation; recovery",
  remark =       "Check month: January or February??",
}

@Article{Antoshenkov:1997:DBO,
  author =       "Gennady Antoshenkov",
  title =        "Dictionary-based order-preserving string compression
                 (*)",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "1",
  pages =        "26--39",
  month =        feb,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:40 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Antoshenkov:Gennady.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006001/70060026.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060026.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060026.ps.gz",
  abstract =     "As no database exists without indexes, no index
                 implementation exists without order-preserving key
                 compression, in particular, without prefix and tail
                 compression. However, despite the great potentials of
                 making indexes smaller and faster, application of
                 general compression methods to ordered data sets has
                 advanced very little. This paper demonstrates that the
                 fast dictionary-based methods can be applied to
                 order-preserving compression almost with the same
                 freedom as in the general case. The proposed new
                 technology has the same speed and a compression rate
                 only marginally lower than the traditional
                 order-indifferent dictionary encoding. Procedures for
                 encoding and generating the encode tables are described
                 covering such order-related features as ordered data
                 set restrictions, sensitivity and insensitivity to a
                 character position, and one-symbol encoding of each
                 frequent trailing character sequence. The experimental
                 results presented demonstrate five-folded compression
                 on real-life data sets and twelve-folded compression on
                 Wisconsin benchmark text fields.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "indexing; order-preserving key compression",
  remark =       "Check month: January or February??",
}

@Article{Singhal:1997:ALB,
  author =       "Vigyan Singhal and Alan Jay Smith",
  title =        "Analysis of Locking Behavior in Three Real Database
                 Systems",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "1",
  pages =        "40--52",
  month =        feb,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:40 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Singhal:Vigyan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Smith:Alan_Jay.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006001/70060040.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060040.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060040.ps.gz",
  abstract =     "Concurrency control is essential to the correct
                 functioning of a database due to the need for correct,
                 reproducible results. For this reason, and because
                 concurrency control is a well-formulated problem, there
                 has developed an enormous body of literature studying
                 the performance of concurrency control algorithms. Most
                 of this literature uses either analytic modeling or
                 random number-driven simulation, and explicitly or
                 implicitly makes certain assumptions about the behavior
                 of transactions and the patterns by which they set and
                 unset locks. Because of the difficulty of collecting
                 suitable measurements, there have been only a few
                 studies which use trace-driven simulation, and still
                 less study directed toward the characterization of
                 concurrency control behavior of real workloads. In this
                 paper, we present a study of three database workloads,
                 all taken from IBM DB2 relational database systems
                 running commercial applications in a production
                 environment. This study considers topics such as
                 frequency of locking and unlocking, deadlock and
                 blocking, duration of locks, types of locks,
                 correlations between applications of lock types,
                 two-phase versus non-two-phase locking, when locks are
                 held and released, etc. In each case, we evaluate the
                 behavior of the workload relative to the assumptions
                 commonly made in the research literature and discuss
                 the extent to which those assumptions may or may not
                 lead to erroneous conclusions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; trace-driven simulation; workload
                 characterization",
  remark =       "Check month: January or February??",
}

@Article{Mehta:1997:DPS,
  author =       "Manish Mehta and David J. DeWitt",
  title =        "Data placement in shared-nothing parallel database
                 systems (*)",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "1",
  pages =        "53--72",
  month =        feb,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:40 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehta:Manish.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006001/70060053.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060053.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006001/70060053.ps.gz;
                 http://link.springer.de/link/service/journals/00778/tocs/mailto: HREF="mailto:helpdesk@link.springer.de">helpdesk@link.springer.de",
  abstract =     "Data placement in shared-nothing database systems has
                 been studied extensively in the past and various
                 placement algorithms have been proposed. However, there
                 is no consensus on the most efficient data placement
                 algorithm and placement is still performed manually by
                 a database administrator with periodic reorganization
                 to correct mistakes. This paper presents the first
                 comprehensive simulation study of data placement issues
                 in a shared-nothing system. The results show that
                 current hardware technology trends have significantly
                 changed the performance tradeoffs considered in past
                 studies. A simplistic data placement strategy based on
                 the new results is developed and shown to perform well
                 for a variety of workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "declustering; disk allocation; resource allocation;
                 resource scheduling",
  remark =       "Check month: January or February??",
}

@Article{Papazoglou:1997:DMO,
  author =       "Mike P. Papazoglou and Bernd J. Kr{\"a}mer",
  title =        "A Database Model for Object Dynamics",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "2",
  pages =        "73--96",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:41 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition. See erratum
                 \cite{Papazoglou:1997:EDM}.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kr=auml=mer:Bernd_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papazoglou:Mike_P=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006002/70060073.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060073.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060073.ps.gz",
  abstract =     "To effectively model complex applications in which
                 constantly changing situations can be represented, a
                 database system must be able to support the runtime
                 specification of structural and behavioral nuances for
                 objects on an individual or group basis. This paper
                 introduces the role mechanism as an extension of
                 object-oriented databases to support unanticipated
                 behavioral oscillations for objects that may attain
                 many types and share a single object identity. A role
                 refers to the ability to represent object dynamics by
                 seamlessly integrating idiosyncratic behavior, possibly
                 in response to external events, with pre-existing
                 object behavior specified at instance creation time. In
                 this manner, the same object can simultaneously be an
                 instance of different classes which symbolize the
                 different roles that this object assumes. The role
                 concept and its underlying linguistic scheme simplify
                 the design requirements of complex applications that
                 need to create and manipulate dynamic objects.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic class hierarchy; dynamic object
                 re-classification; object migration; object role model;
                 object-oriented database systems",
  remark =       "Check month: May or August??",
}

@Article{Catarci:1997:GIH,
  author =       "Tiziana Catarci and Giuseppe Santucci and John
                 Cardiff",
  title =        "Graphical interaction with heterogeneous databases
                 (*)",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "2",
  pages =        "97--120",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:41 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cardiff:John.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Catarci:Tiziana.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Santucci:Giuseppe.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006002/70060097.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060097.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060097.ps.gz",
  abstract =     "During the past few years our research efforts have
                 been inspired by two different needs. On one hand, the
                 number of non-expert users accessing databases is
                 growing apace. On the other, information systems will
                 no longer be characterized by a single centralized
                 architecture, but rather by several heterogeneous
                 component systems. In order to address such needs we
                 have designed a new query system with both
                 user-oriented and multidatabase features. The system's
                 main components are an adaptive visual interface,
                 providing the user with different and interchangeable
                 interaction modalities, and a ``translation layer'',
                 which creates and offers to the user the illusion of a
                 single homogeneous schema out of several heterogeneous
                 components. Both components are founded on a common
                 ground, i.e. a formally defined and semantically rich
                 data model, the Graph Model, and a minimal set of
                 Graphical Primitives, in terms of which general query
                 operations may be visually expressed. The Graph Model
                 has a visual syntax, so that graphical operations can
                 be applied on its components without unnecessary
                 mappings, and an object-based semantics. The aim of
                 this paper is twofold. We first present an overall view
                 of the system architecture and then give a
                 comprehensive description of the lower part of the
                 system itself. In particular, we show how schemata
                 expressed in different data models can be translated in
                 terms of Graph Model, possibly by exploiting reverse
                 engineering techniques. Moreover, we show how mappings
                 can be established between well-known query languages
                 and the Graphical Primitives. Finally, we describe in
                 detail how queries expressed by using the Graphical
                 Primitives can be translated in terms of relational
                 expressions so to be processed by actual DBMSs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: May or August??",
}

@Article{Chen:1997:AHF,
  author =       "Ming-Syan Chen and Hui-I Hsiao and Philip S. Yu",
  title =        "On Applying Hash Filters to Improving the Execution of
                 Multi-Join Queries",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "2",
  pages =        "121--131",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:41 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Ming=Syan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsiao:Hui=I.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Philip_S=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006002/70060121.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060121.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060121.ps.gz",
  abstract =     "In this paper, we explore an approach of interleaving
                 a bushy execution tree with hash filters to improve the
                 execution of multi-join queries. Similar to semi-joins
                 in distributed query processing, hash filters can be
                 applied to eliminate non-matching tuples from joining
                 relations before the execution of a join, thus reducing
                 the join cost. Note that hash filters built in
                 different execution stages of a bushy tree can have
                 different costs and effects. The effect of hash filters
                 is evaluated first. Then, an efficient scheme to
                 determine an effective sequence of hash filters for a
                 bushy execution tree is developed, where hash filters
                 are built and applied based on the join sequence
                 specified in the bushy tree so that not only is the
                 reduction effect optimized but also the cost associated
                 is minimized. Various schemes using hash filters are
                 implemented and evaluated via simulation. It is
                 experimentally shown that the application of hash
                 filters is in general a very powerful means to improve
                 the execution of multi-join queries, and the
                 improvement becomes more prominent as the number of
                 relations in a query increases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "bushy trees; hash filters; parallel query processing;
                 sort-merge joins",
  remark =       "Check month: May or August??",
}

@Article{Ioannidis:1997:PQO,
  author =       "Yannis E. Ioannidis and Raymond T. Ng and Kyuseok Shim
                 and Timos K. Sellis",
  title =        "Parametric Query Optimization",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "2",
  pages =        "132--151",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:41 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ioannidis:Yannis_E=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shim:Kyuseok.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006002/70060132.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060132.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060132.ps.gz",
  abstract =     "In most database systems, the values of many important
                 run-time parameters of the system, the data, or the
                 query are unknown at query optimization time.
                 Parametric query optimization attempts to identify at
                 compile time several execution plans, each one of which
                 is optimal for a subset of all possible values of the
                 run-time parameters. The goal is that at run time, when
                 the actual parameter values are known, the appropriate
                 plan should be identifiable with essentially no
                 overhead. We present a general formulation of this
                 problem and study it primarily for the buffer size
                 parameter. We adopt randomized algorithms as the main
                 approach to this style of optimization and enhance them
                 with a {\em sideways information passing\/} feature
                 that increases their effectiveness in the new task.
                 Experimental results of these enhanced algorithms show
                 that they optimize queries for large numbers of buffer
                 sizes in the same time needed by their conventional
                 versions for a single buffer size, without much
                 sacrifice in the output quality and with essentially
                 zero run-time overhead.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: May or August??",
}

@Article{Mehrotra:1997:CCH,
  author =       "Sharad Mehrotra and Henry F. Korth and Avi
                 Silberschatz",
  title =        "Concurrency Control in Hierarchical Multidatabase
                 Systems",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "2",
  pages =        "152--172",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:41 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Korth:Henry_F=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehrotra:Sharad.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006002/70060152.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060152.pdf;
                 http://link.springer.de/link/service/journals/00778/papers/7006002/70060152.ps.gz;
                 http://link.springer.de/link/service/journals/00778/tocs/mailto: HREF="mailto:helpdesk@link.springer.de">helpdesk@link.springer.de",
  abstract =     "Over the past decade, significant research has been
                 done towards developing transaction management
                 algorithms for multidatabase systems. Most of this work
                 assumes a monolithic architecture of the multidatabase
                 system with a single software module that follows a
                 single transaction management algorithm to ensure the
                 consistency of data stored in the local databases. This
                 monolithic architecture is not appropriate in a
                 multidatabase environment where the system spans
                 multiple different organizations that are distributed
                 over various geographically distant locations. In this
                 paper, we propose an alternative multidatabase
                 transaction management architecture, where the system
                 is hierarchical in nature. Hierarchical architecture
                 has consequences on the design of transaction
                 management algorithms. An implication of the
                 architecture is that the transaction management
                 algorithms followed by a multidatabase system must be
                 {\em composable\/} --- that is, it must be possible to
                 incorporate individual multidatabase systems as
                 elements in a larger multidatabase system. We present a
                 hierarchical architecture for a multidatabase
                 environment and develop techniques for concurrency
                 control in such systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; database management; distributed
                 databases; multidatabase management",
  remark =       "Check month: May or August??",
  xxauthor =     "Sharad Mehrotra and Henry F. Korth and Abraham
                 Silberschatz",
}

@Article{Cobb:1997:IOT,
  author =       "Edward E. Cobb",
  title =        "The impact of object technology on commercial
                 transaction processing",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "173--190",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Businesses today are searching for information
                 solutions that enable them to compete in the global
                 marketplace. To minimize risk, these solutions must
                 build on existing investments, permit the best
                 technology to be applied to the problem, and be
                 manageable. Object technology, with its promise of
                 improved productivity and quality in application
                 development, delivers these characteristics but, to
                 date, its deployment in commercial business
                 applications has been limited. One possible reason is
                 the absence of the transaction paradigm, widely used in
                 commercial environments and essential for reliable
                 business applications. For object technology to be a
                 serious contender in the construction of these
                 solutions requires: --- technology for transactional
                 objects. In December 1994, the Object Management Group
                 adopted a specification for an object {\em transaction
                 service\/} (OTS). The OTS specifies mechanisms for
                 defining and manipulating transactions. Though derived
                 from the X/Open distributed transaction processing
                 model, OTS contains additional enhancements
                 specifically designed for the object environment.
                 Similar technology from Microsoft appeared at the end
                 of 1995. --- methodologies for building new business
                 systems from existing parts. Business process
                 re-engineering is forcing businesses to improve their
                 operations which bring products to market. {\em
                 Workflow computing}, when used in conjunction with {\em
                 ``object wrappers''\/} provides tools to both define
                 and track execution of business processes which
                 leverage existing applications and infrastructure. --
                 an execution environment which satisfies the
                 requirements of the operational needs of the business.
                 Transaction processing (TP) monitor technology, though
                 widely accepted for mainframe transaction processing,
                 has yet to enjoy similar success in the client/server
                 marketplace. Instead the database vendors, with their
                 extensive tool suites, dominate. As object brokers
                 mature they will require many of the functions of
                 today's TP monitors. Marrying these two technologies
                 can produce a robust execution environment which offers
                 a superior alternative for building and deploying
                 client/server applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "objects; transaction processing; workflow",
}

@Article{Cobb:1997:ITC,
  author =       "Edward E. Cobb",
  title =        "The Impact of Technology on Commercial Transaction
                 Processing",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "173--190",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 10:11:57 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cobb:Edward_E=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060173.htm",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: May or August??",
  xxtitle =      "The impact of object technology on commercial
                 transaction processing",
}

@Article{Steinbrunn:1997:HRO,
  author =       "Michael Steinbrunn and Guido Moerkotte and Alfons
                 Kemper",
  title =        "Heuristic and Randomized Optimization for the Join
                 Ordering Problem",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "191--208",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moerkotte:Guido.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Steinbrunn:Michael.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060191.htm",
  abstract =     "Recent developments in database technology, such as
                 deductive database systems, have given rise to the
                 demand for new, cost-effective optimization techniques
                 for join expressions. In this paper many different
                 algorithms that compute approximate solutions for
                 optimizing join orders are studied since traditional
                 dynamic programming techniques are not appropriate for
                 complex problems. Two possible solution spaces, the
                 space of left-deep and bushy processing trees, are
                 evaluated from a statistical point of view. The result
                 is that the common limitation to left-deep processing
                 trees is only advisable for certain join graph types.
                 Basically, optimizers from three classes are analysed:
                 heuristic, randomized and genetic algorithms. Each one
                 is extensively scrutinized with respect to its working
                 principle and its fitness for the desired application.
                 It turns out that randomized and genetic algorithms are
                 well suited for optimizing join expressions. They
                 generate solutions of high quality within a reasonable
                 running time. The benefits of heuristic optimizers,
                 namely the short running time, are often outweighed by
                 merely moderate optimization performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "genetic algorithms; heuristic algorithms; join
                 ordering; query optimization; randomized algorithms",
  remark =       "Check month: May or August??",
}

@Article{Panagos:1997:SRC,
  author =       "Euthimios Panagos and Alexandros Biliris",
  title =        "Synchronization and Recovery in a Client-Server
                 Storage System",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "209--223",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Biliris:Alexandros.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Panagos:Euthimios.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060209.htm",
  abstract =     "Client-server object-oriented database management
                 systems differ significantly from traditional
                 centralized systems in terms of their architecture and
                 the applications they target. In this paper, we present
                 the client-server architecture of the EOS storage
                 manager and we describe the concurrency control and
                 recovery mechanisms it employs. EOS offers a
                 semi-optimistic locking scheme based on the
                 multi-granularity two-version two-phase locking
                 protocol. Under this scheme, multiple concurrent
                 readers are allowed to access a data item while it is
                 being updated by a single writer. Recovery is based on
                 write-ahead redo-only logging. Log records are
                 generated at the clients and they are shipped to the
                 server during normal execution and at transaction
                 commit. Transaction rollback is fast because there are
                 no updates that have to be undone, and recovery from
                 system crashes requires only one scan of the log for
                 installing the changes made by transactions that
                 committed before the crash. We also present a
                 preliminary performance evaluation of the
                 implementation of the above mechanisms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "checkpoint; client-server architecture; concurrency
                 control; locking; logging; object management; recovery;
                 transaction management",
  remark =       "Check month: May or August??",
}

@Article{Lomet:1997:CRI,
  author =       "David B. Lomet and Betty Salzberg",
  title =        "Concurrency and Recovery for Index Trees",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "224--240",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lomet:David_B=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Salzberg:Betty.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060224.htm",
  abstract =     "Although many suggestions have been made for
                 concurrency in B$^+$-trees, few of these have
                 considered recovery as well. We describe an approach
                 which provides high concurrency while preserving
                 well-formed trees across system crashes. Our approach
                 works for a class of index trees that is a
                 generalization of the B$^{\rm link}$-tree. This class
                 includes some multi-attribute indexes and temporal
                 indexes. Structural changes in an index tree are
                 decomposed into a sequence of atomic actions, each one
                 leaving the tree well-formed and each working on a
                 separate level of the tree. All atomic actions on
                 levels of the tree above the leaf level are independent
                 of database transactions, and so are of short duration.
                 Incomplete structural changes are detected in normal
                 operations and trigger completion.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; B-trees; concurrency; indexing;
                 recovery",
  remark =       "Check month: May or August??",
}

@Article{Haas:1997:STA,
  author =       "Laura M. Haas and Michael J. Carey and Miron Livny and
                 Amit Shukla",
  title =        "Seeking the truth about {\em ad hoc\/} join costs",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "241--256",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Carey:Michael_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haas:Laura_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Livny:Miron.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shukla:Amit.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060241.htm",
  abstract =     "In this paper, we re-examine the results of prior work
                 on methods for computing {\em ad hoc\/} joins. We
                 develop a detailed cost model for predicting join
                 algorithm performance, and we use the model to develop
                 cost formulas for the major {\em ad hoc\/} join methods
                 found in the relational database literature. We show
                 that various pieces of ``common wisdom'' about join
                 algorithm performance fail to hold up when analyzed
                 carefully, and we use our detailed cost model to derive
                 optimal buffer allocation schemes for each of the join
                 methods examined here. We show that optimizing their
                 buffer allocations can lead to large performance
                 improvements, e.g., as much as a 400\% improvement in
                 some cases. We also validate our cost model's
                 predictions by measuring an actual implementation of
                 each join algorithm considered. The results of this
                 work should be directly useful to implementors of
                 relational query optimizers and query processing
                 systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "buffer allocation; cost models; join methods;
                 optimization; performance",
  remark =       "Check month: May or August??",
}

@Article{Papazoglou:1997:EDM,
  author =       "Mike P. Papazoglou and Bernd J. Kr{\"a}mer",
  title =        "Erratum --- {A} database model for object dynamics",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "3",
  pages =        "257--260",
  month =        aug,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:42 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0006003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition. See \cite{Papazoglou:1997:DMO}.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kr=auml=mer:Bernd_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Papazoglou:Mike_P=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006003/70060257.htm",
  abstract =     "Due to a technical error, some figures of the above
                 paper were not reproduced satisfactorily. They are
                 printed again below.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: May or August??",
}

@Article{Fahl:1997:QPO,
  author =       "Gustav Fahl and Tore Risch",
  title =        "Query Processing Over Object Views of Relational
                 Data",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "261--281",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:44 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fahl:Gustav.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Risch:Tore.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006004/70060261.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006004/70060261.pdf",
  abstract =     "This paper presents an approach to {\em object view\/}
                 management for relational databases. Such a view
                 mechanism makes it possible for users to transparently
                 work with data in a relational database as if it was
                 stored in an object-oriented (OO) database. A query
                 against the object view is translated to one or several
                 queries against the relational database. The results of
                 these queries are then processed to form an answer to
                 the initial query. The approach is not restricted to a
                 `pure' object view mechanism for the relational data,
                 since the object view can also store its own data and
                 methods. Therefore it must be possible to process
                 queries that combine local data residing in the object
                 view with data retrieved from the relational database.
                 We discuss the key issues when object views of
                 relational databases are developed, namely: how to map
                 relational structures to sub-type/supertype hierarchies
                 in the view, how to represent relational database
                 access in OO query plans, how to provide the concept of
                 object identity in the view, how to handle the fact
                 that the extension of types in the view depends on the
                 state of the relational database, and how to process
                 and optimize queries against the object view. The
                 results are based on experiences from a running
                 prototype implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "object views; object-oriented federated databases;
                 query optimization; query processing; relational
                 databases",
}

@Article{Diaz:1997:EEA,
  author =       "Oscar D{\'\i}az and Arturo Jaime",
  title =        "{EXACT}: An Extensible Approach to Active
                 Object-Oriented Databases",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "282--295",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:44 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/D=iacute=az:Oscar.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jaime:Arturo.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006004/70060282.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006004/70060282.pdf",
  abstract =     "Active database management systems (DBMSs) are a
                 fast-growing area of research, mainly due to the large
                 number of applications which can benefit from this
                 active dimension. These applications are far from being
                 homogeneous, requiring different kinds of
                 functionalities. However, most of the active DBMSs
                 described in the literature only provide a {\em fixed,
                 hard-wired\/} execution model to support the active
                 dimension. In object-oriented DBMSs,
                 event-condition-action rules have been proposed for
                 providing active behaviour. This paper presents EXACT,
                 a rule manager for object-oriented DBMSs which provides
                 a variety of options from which the designer can choose
                 the one that best fits the semantics of the concept to
                 be supported by rules. Due to the difficulty of
                 foreseeing future requirements, special attention has
                 been paid to making rule management easily extensible,
                 so that the user can tailor it to suit specific
                 applications. This has been borne out by an
                 implementation in ADAM, an object-oriented DBMS. An
                 example is shown of how the default mechanism can be
                 easily extended to support new requirements.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "active DBMS; extensibility; metaclasses;
                 object-oriented DBMS",
}

@Article{Bohm:1997:SDS,
  author =       "Klemens B{\"o}hm and Karl Aberer and Erich J. Neuhold
                 and Xiaoya Yang",
  title =        "Structured Document Storage and Refined Declarative
                 and Navigational Access Mechanisms in {HyperStorM}",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "296--311",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:44 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aberer:Karl.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/B=ouml=hm:Klemens.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neuhold:Erich_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yang:Xiaoya.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006004/70060296.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006004/70060296.pdf",
  abstract =     "The combination of SGML and database technology allows
                 to refine both declarative and navigational access
                 mechanisms for structured document collection: with
                 regard to declarative access, the user can formulate
                 complex information needs without knowing a query
                 language, the respective document type definition (DTD)
                 or the underlying modelling. Navigational access is
                 eased by hyperlink-rendition mechanisms going beyond
                 plain link-integrity checking. With our approach, the
                 database-internal representation of documents is
                 configurable. It allows for an efficient implementation
                 of operations, because DTD knowledge is not needed for
                 document structure recognition. We show how the number
                 of method invocations and the cost of parsing can be
                 significantly reduced.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "document query languages; navigation; OODBMSs; SGML",
}

@Article{Muck:1997:CTH,
  author =       "Thomas A. M{\"u}ck and Martin L. Polaschek",
  title =        "A Configurable Type Hierarchy Index for {OODB}",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "312--332",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 08:46:02 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/M=uuml=ck:Thomas_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Polaschek:Martin_L=.html",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mueck:1997:CTH,
  author =       "Thomas A. Mueck and Martin L. Polaschek",
  title =        "A configurable type hierarchy index for {OODB}",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "312--332",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:44 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/7006004/70060312.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006004/70060312.pdf",
  abstract =     "With respect to the specific requirements of advanced
                 OODB applications, index data structures for type
                 hierarchies in OODBMS have to provide efficient support
                 for multiattribute queries and have to allow index
                 optimization for a particular query profile. We
                 describe the {\em multikey type index\/} and an
                 efficient implementation of this indexing scheme. It
                 meets both requirements: in addition to its
                 multiattribute query capabilities it is designed as a
                 mediator between two standard design alternatives,
                 key-grouping and type-grouping. A prerequisite for the
                 multikey type index is a linearization algorithm which
                 maps type hierarchies to linearly ordered attribute
                 domains in such a way that each subhierarchy is
                 represented by an interval of this domain. The
                 algorithm extends previous results with respect to
                 multiple inheritance. The subsequent evaluation of our
                 proposal focuses on storage space overhead as well as
                 on the number of disk I/O operations needed for query
                 execution. The analytical results for the multikey type
                 index are compared to previously published figures for
                 well-known single-key search structures. The comparison
                 clearly shows the superiority of the multikey type
                 index for a large class of query profiles.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; indexing; multiple inheritance; OODB;
                 type hierarchies",
}

@Article{Berchtold:1997:UEF,
  author =       "Stefan Berchtold and Daniel A. Keim and Hans-Peter
                 Kriegel",
  title =        "Using Extended Feature Objects for Partial Similarity
                 Retrieval",
  journal =      j-VLDB-J,
  volume =       "6",
  number =       "4",
  pages =        "333--348",
  month =        nov,
  year =         "1997",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:44 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb6.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t7006004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Berchtold:Stefan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Keim:Daniel_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kriegel:Hans=Peter.html;
                 http://link.springer.de/link/service/journals/00778/bibs/7006004/70060333.htm;
                 http://link.springer.de/link/service/journals/00778/papers/7006004/70060333.pdf",
  abstract =     "In this paper, we introduce the concept of extended
                 feature objects for similarity retrieval. Conventional
                 approaches for similarity search in databases map each
                 object in the database to a point in some
                 high-dimensional feature space and define similarity as
                 some distance measure in this space. For many
                 similarity search problems, this feature-based approach
                 is not sufficient. When retrieving partially similar
                 polygons, for example, the search cannot be restricted
                 to edge sequences, since similar polygon sections may
                 start and end anywhere on the edges of the polygons. In
                 general, inherently continuous problems such as the
                 partial similarity search cannot be solved by using
                 point objects in feature space. In our solution, we
                 therefore introduce extended feature objects consisting
                 of an infinite set of feature points. For an efficient
                 storage and retrieval of the extended feature objects,
                 we determine the minimal bounding boxes of the feature
                 objects in multidimensional space and store these boxes
                 using a spatial access structure. In our concrete
                 polygon problem, sets of polygon sections are mapped to
                 2D feature objects in high-dimensional space which are
                 then approximated by minimal bounding boxes and stored
                 in an R$^*$-tree. The selectivity of the index is
                 improved by using an adaptive decomposition of very
                 large feature objects and a dynamic joining of small
                 feature objects. For the polygon problem, translation,
                 rotation, and scaling invariance is achieved by using
                 the Fourier-transformed curvature of the normalized
                 polygon sections. In contrast to vertex-based
                 algorithms, our algorithm guarantees that no false
                 dismissals may occur and additionally provides fast
                 search times for realistic database sizes. We evaluate
                 our method using real polygon data of a supplier for
                 the car manufacturing industry.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "CAD databases; Fourier transformation; indexing and
                 query processing of spatial objects; partial similarity
                 retrieval",
}

@Article{Han:1998:ORQ,
  author =       "Jia Liang Han",
  title =        "Optimizing Relational Queries in Connection
                 Hypergraphs: Nested Queries, Views, and Binding
                 Propagations",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "1",
  pages =        "1--11",
  month =        feb,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Han:Jia_Liang.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007001/80070001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007001/80070001.pdf",
  abstract =     "We optimize relational queries using connection
                 hypergraphs (CHGs). All operations including
                 value-passing between SQL blocks can be set-oriented.
                 By introducing partial evaluations, reordering
                 operations can be achieved for nested queries. For a
                 query using views, we merge CHGs for the views and the
                 query into one CHG and then apply query optimization.
                 Furthermore, we may simulate magic sets methods
                 elegantly in a CHG. Sideways information-passing
                 strategies (SIPS) in a CHG amount to partial
                 evaluations of SIPS paths. We introduce the maximum
                 SIPS strategy, which performs SIPS for all bindings and
                 all SIPS paths for a query. The new method has several
                 advantages. First, the maximum SIPS strategy can be
                 more efficient than the previous SIPS based on simple
                 heuristics. Second, it is conceptually simple and easy
                 to implement. Third, the processing strategies may be
                 incorporated with the search space for query execution
                 plans, which is a proven optimization strategy
                 introduced by System R. Fourth, it provides a general
                 framework of query optimization and may potentially be
                 used to optimize next-generation database systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "connection hypergraphs; partial evaluations;
                 relational query optimization; search space; SIPS",
}

@Article{Hanson:1998:FRC,
  author =       "Eric N. Hanson and I.-Cheng Chen and Roxana Dastur and
                 Kurt Engel and Vijay Ramaswamy and Wendy Tan and Chun
                 Xu",
  title =        "A Flexible and Recoverable Client\slash Server
                 Database Event Notification System",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "1",
  pages =        "12--24",
  month =        feb,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:I==Cheng.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dastur:Roxana.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Engel:Kurt.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hanson:Eric_N=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ramaswamy:Vijay.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Wendy.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/x/Xu:Chun.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007001/80070012.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007001/80070012.pdf",
  abstract =     "A software architecture is presented that allows
                 client application programs to interact with a DBMS
                 server in a flexible and powerful way, using either
                 direct, volatile messages, or messages sent via
                 recoverable queues. Normal requests from clients to the
                 server and replies from the server to clients can be
                 transmitted using direct or recoverable messages. In
                 addition, an application event notification mechanism
                 is provided, whereby client applications running
                 anywhere on the network can register for events, and
                 when those events are raised, the clients are notified.
                 A novel parameter passing mechanism allows a set of
                 tuples to be included in an event notification. The
                 event mechanism is particularly useful in an active
                 DBMS, where events can be raised by triggers to signal
                 running application programs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mehta:1998:OPM,
  author =       "Ashish Mehta and James Geller and Yehoshua Perl and
                 Erich J. Neuhold",
  title =        "The {OODB} Path-Method Generator ({PMG}) Using Access
                 Weights and Precomputed Access Relevance",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "1",
  pages =        "25--47",
  month =        feb,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Geller:James.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mehta:Ashish.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Neuhold:Erich_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Perl:Yehoshua.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007001/80070025.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007001/80070025.pdf",
  abstract =     "A {\em path-method\/} is used as a mechanism in
                 object-oriented databases (OODBs) to retrieve or to
                 update information relevant to one class that is not
                 stored with that class but with some other class. A
                 path-method is a method which traverses from one class
                 through a chain of connections between classes and
                 accesses information at another class. However, it is a
                 difficult task for a casual user or even an application
                 programmer to write path-methods to facilitate queries.
                 This is because it might require comprehensive
                 knowledge of many classes of the conceptual schema that
                 are not directly involved in the query, and therefore
                 may not even be included in a user's (incomplete) view
                 about the contents of the database. We have developed a
                 system, called {\em path-method generator\/} (PMG),
                 which generates path-methods automatically according to
                 a user's database-manipulating requests. The PMG offers
                 the user one of the possible path-methods and the user
                 verifies from his knowledge of the intended purpose of
                 the request whether that path-method is the desired
                 one. If the path method is rejected, then the user can
                 utilize his now increased knowledge about the database
                 to request (with additional parameters given) another
                 offer from the PMG. The PMG is based on {\em access
                 weights\/} attached to the connections between classes
                 and precomputed {\em access relevance\/} between every
                 pair of classes of the OODB. Specific rules for access
                 weight assignment and algorithms for computing access
                 relevance appeared in our previous papers [MGPF92,
                 MGPF93, MGPF96]. In this paper, we present a variety of
                 traversal algorithms based on access weights and
                 precomputed access relevance. Experiments identify some
                 of these algorithms as very successful in generating
                 most desired path-methods. The PMG system utilizes
                 these successful algorithms and is thus an efficient
                 tool for aiding the user with the difficult task of
                 querying and updating a large OODB.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access relevance; access weight; object-oriented
                 databases; OODB queries; path-method; traversal
                 algorithms",
}

@Article{Scheuermann:1998:DPL,
  author =       "Peter Scheuermann and Gerhard Weikum and Peter
                 Zabback",
  title =        "Data Partitioning and Load Balancing in Parallel Disk
                 Systems",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "1",
  pages =        "48--66",
  month =        feb,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Scheuermann:Peter.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zabback:Peter.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007001/80070048.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007001/80070048.pdf",
  abstract =     "Parallel disk systems provide opportunities for
                 exploiting I/O parallelism in two possible ways, namely
                 via inter-request and intra-request parallelism. In
                 this paper, we discuss the main issues in performance
                 tuning of such systems, namely striping and load
                 balancing, and show their relationship to response time
                 and throughput. We outline the main components of an
                 intelligent, self-reliant file system that aims to
                 optimize striping by taking into account the
                 requirements of the applications, and performs load
                 balancing by judicious file allocation and dynamic
                 redistributions of the data when access patterns
                 change. Our system uses simple but effective heuristics
                 that incur only little overhead. We present performance
                 experiments based on synthetic workloads and real-life
                 traces.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data allocation; disk cooling; file striping; load
                 balancing; parallel disk systems; performance tuning",
}

@Article{Ishakbeyoglu:1998:MII,
  author =       "Naci S. Ishakbeyo{\u{g}}lu and Z. Meral
                 {\"O}zsoyo{\u{g}}lu",
  title =        "Maintenance of Implication Integrity Constraints Under
                 Updates to Constraints",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "2",
  pages =        "67--78",
  month =        may,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsoyoglu:Z=_Meral.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/i/Ishakbeyoglu:Naci.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007002/80070067.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007002/80070067.pdf",
  abstract =     "Semantic integrity constraints are used for enforcing
                 the integrity of the database as well as for improving
                 the efficiency of the database utilization. Although
                 semantic integrity constraints are usually much more
                 static as compared to the data itself, changes in the
                 data semantics may necessitate corresponding changes in
                 the constraint base. In this paper we address the
                 problems related with maintaining a consistent and
                 non-redundant set of constraints satisfied by the
                 database in the case of updates to the constraint base.
                 We consider implication constraints as semantic
                 integrity constraints. The constraints are represented
                 as conjunctions of inequalities. We present a
                 methodology to determine whether a constraint is
                 redundant or contradictory with respect to a set of
                 constraints. The methodology is based on the
                 partitioning of the constraint base which improves the
                 efficiency of algorithms that check whether a
                 constraint is redundant or contradictory with respect
                 to a constraint base.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "implication integrity constraints; integrity
                 constraints; partitioning; redundancy; satisfiability",
}

@Article{Dessloch:1998:ADP,
  author =       "Stefan De{\ss}loch and Theo H{\"a}rder and Nelson
                 Mendon{\c{c}}a Mattos and Bernhard Mitschang and
                 Joachim Thomas",
  title =        "Advanced Data Processing in {KRISYS}: Modeling
                 Concepts, Implementation Techniques, and Client\slash
                 Server Issues",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "2",
  pages =        "79--95",
  month =        may,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/De=szlig=loch:Stefan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mattos:Nelson_Mendon=ccedil=a.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mitschang:Bernhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thomas:Joachim.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007002/80070079.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007002/80070079.pdf",
  abstract =     "The increasing power of modern computers is steadily
                 opening up new application domains for advanced data
                 processing such as engineering and knowledge-based
                 applications. To meet their requirements, concepts for
                 advanced data management have been investigated during
                 the last decade, especially in the field of object
                 orientation. Over the last couple of years, the
                 database group at the University of Kaiserslautern has
                 been developing such an advanced database system, the
                 KRISYS prototype. In this article, we report on the
                 results and experiences obtained in the course of this
                 project. The primary objective for the first version of
                 KRISYS was to provide semantic features, such as an
                 expressive data model, a set-oriented query language,
                 deductive as well as active capabilities. The first
                 KRISYS prototype became completely operational in 1989.
                 To evaluate its features and to stabilize its
                 functionality, we started to develop several
                 applications with the system. These experiences marked
                 the starting point for an overall redesign of KRISYS.
                 Major goals were to tune KRISYS and its
                 query-processing facilities to a suitable client/server
                 environment, as well as to provide elaborate mechanisms
                 for consistency control comprising semantic integrity
                 constraints, multi-user synchronization, and failure
                 recovery. The essential aspects of the resulting
                 client/server architecture are embodied by the
                 client-side data management needed to effectively
                 support advanced applications and to gain the required
                 system performance for interactive work. The project
                 stages of KRISYS properly reflect the essential
                 developments that have taken place in the research on
                 advanced database systems over the last years. Hence,
                 the subsequent discussions will bring up a number of
                 important aspects with regard to advanced data
                 processing that are of significant general importance,
                 as well as of general applicability to database
                 systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "client\slash server architectures; Consistency
                 control; object-oriented modeling concepts; query
                 processing; run-time optimization",
}

@Article{Abiteboul:1998:LVS,
  author =       "Serge Abiteboul and Sophie Cluet and Tova Milo",
  title =        "A Logical View of Structured Files",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "2",
  pages =        "96--114",
  month =        may,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Abiteboul:Serge.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cluet:Sophie.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Milo:Tova.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007002/80070096.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007002/80070096.pdf",
  abstract =     "Structured data stored in files can benefit from
                 standard database technology. In particular, we show
                 here how such data can be queried and updated using
                 declarative database languages. We introduce the notion
                 of {\em structuring schema}, which consists of a
                 grammar annotated with database programs. Based on a
                 structuring schema, a file can be viewed as a database
                 structure, queried and updated as such. For {\em
                 queries}, we show that almost standard database
                 optimization techniques can be used to answer queries
                 without having to construct the entire database. For
                 {\em updates}, we study in depth the propagation to the
                 file of an update specified on the database view of
                 this file. The problem is not feasible in general and
                 we present a number of negative results. The positive
                 results consist of techniques that allow to propagate
                 updates efficiently under some reasonable {\em
                 locality\/} conditions on the structuring schemas.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database; file system; query; query and update
                 optimization; textual data; update",
}

@Article{Ooi:1998:FIR,
  author =       "Beng Chin Ooi and Kian-Lee Tan and Tat Seng Chua and
                 Wynne Hsu",
  title =        "Fast Image Retrieval Using Color-Spatial Information",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "2",
  pages =        "115--128",
  month =        may,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:45 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chua:Tat=Seng.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Hsu:Wynne.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Ooi:Beng_Chin.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tan:Kian=Lee.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007002/80070115.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007002/80070115.pdf",
  abstract =     "In this paper, we present an image retrieval system
                 that employs both the color and spatial information of
                 images to facilitate the retrieval process. The basic
                 unit used in our technique is a {\em single-colored
                 cluster}, which bounds a homogeneous region of that
                 color in an image. Two clusters from two images are
                 similar if they are of the same color and overlap in
                 the image space. The number of clusters that can be
                 extracted from an image can be very large, and it
                 affects the accuracy of retrieval. We study the effect
                 of the number of clusters on retrieval effectiveness to
                 determine an appropriate value for ``optimal''
                 performance. To facilitate efficient retrieval, we also
                 propose a multi-tier indexing mechanism called the {\em
                 Sequenced Multi-Attribute Tree\/} (SMAT). We
                 implemented a two-tier SMAT, where the first layer is
                 used to prune away clusters that are of different
                 colors, while the second layer discriminates clusters
                 of different spatial locality. We conducted an
                 experimental study on an image database consisting of
                 12,000 images. Our results show the effectiveness of
                 the proposed color-spatial approach, and the efficiency
                 of the proposed indexing mechanism.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "color-spatial information; content-based retrieval;
                 sequenced multi-attribute tree; single-colored
                 cluster",
}

@Article{Jarke:1998:GE,
  author =       "Matthias Jarke",
  title =        "Guest {Editorial}",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "129--129",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/8007003/80070129.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070129.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Seshadri:1998:EAD,
  author =       "Praveen Seshadri",
  title =        "Enhanced Abstract Data Types in Object-Relational
                 Databases",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "130--140",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special Issue on {VLDB 1997}. Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:Praveen.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007003/80070130.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070130.pdf",
  abstract =     "The explosion in complex multimedia content makes it
                 crucial for database systems to support such data
                 efficiently. This paper argues that the ``blackbox''
                 ADTs used in current object-relational systems inhibit
                 their performance, thereby limiting their use in
                 emerging applications. Instead, the next generation of
                 object-relational database systems should be based on
                 enhanced abstract data type (E-ADT) technology. An
                 (E-ADT) can expose the {\em semantics\/} of its methods
                 to the database system, thereby permitting advanced
                 query optimizations. Fundamental architectural changes
                 are required to build a database system with E-ADTs;
                 the added functionality should not compromise the
                 modularity of data types and the extensibility of the
                 type system. The implementation issues have been
                 explored through the development of E-ADTs in {\em
                 Predator}. Initial performance results demonstrate an
                 order of magnitude in performance improvements.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database types; extensibility; object-relational
                 database; query optimization",
}

@Article{Kraiss:1998:IDC,
  author =       "Achim Kraiss and Gerhard Weikum",
  title =        "Integrated Document Caching and Prefetching in Storage
                 Hierarchies Based on {Markov}-Chain Predictions",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "141--162",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kraiss:Achim.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007003/80070141.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070141.pdf",
  abstract =     "Large multimedia document archives may hold a major
                 fraction of their data in tertiary storage libraries
                 for cost reasons. This paper develops an integrated
                 approach to the vertical data migration between the
                 tertiary, secondary, and primary storage in that it
                 reconciles speculative prefetching, to mask the high
                 latency of the tertiary storage, with the replacement
                 policy of the document caches at the secondary and
                 primary storage level, and also considers the
                 interaction of these policies with the tertiary and
                 secondary storage request scheduling. The integrated
                 migration policy is based on a continuous-time Markov
                 chain model for predicting the expected number of
                 accesses to a document within a specified time horizon.
                 Prefetching is initiated only if that expectation is
                 higher than those of the documents that need to be
                 dropped from secondary storage to free up the necessary
                 space. In addition, the possible resource contention at
                 the tertiary and secondary storage is taken into
                 account by dynamically assessing the response-time
                 benefit of prefetching a document versus the penalty
                 that it would incur on the response time of the pending
                 document requests. The parameters of the
                 continuous-time Markov chain model, the probabilities
                 of co-accessing certain documents and the interaction
                 times between successive accesses, are dynamically
                 estimated and adjusted to evolving workload patterns by
                 keeping online statistics. The integrated policy for
                 vertical data migration has been implemented in a
                 prototype system. The system makes profitable use of
                 the Markov chain model also for the scheduling of
                 volume exchanges in the tertiary storage library.
                 Detailed simulation experiments with Web-server-like
                 synthetic workloads indicate significant gains in terms
                 of client response time. The experiments also show that
                 the overhead of the statistical bookkeeping and the
                 computations for the access predictions is
                 affordable.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "caching; Markov chains; performance; prefetching;
                 scheduling; stochastic modeling; tertiary storage",
}

@Article{Chakrabarti:1998:SFS,
  author =       "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal
                 and Prabhakar Raghavan",
  title =        "Scalable Feature Selection, Classification and
                 Signature Generation for Organizing Large Text
                 Databases into Hierarchical Topic Taxonomies",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "163--178",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Agrawal:Rakesh.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chakrabarti:Soumen.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Dom:Byron.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raghavan:Prabhakar.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007003/80070163.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070163.pdf",
  abstract =     "We explore how to organize large text databases
                 hierarchically by topic to aid better searching,
                 browsing and filtering. Many corpora, such as Internet
                 directories, digital libraries, and patent databases
                 are manually organized into topic hierarchies, also
                 called {\em taxonomies}. Similar to indices for
                 relational data, taxonomies make search and access more
                 efficient. However, the exponential growth in the
                 volume of on-line textual information makes it nearly
                 impossible to maintain such taxonomic organization for
                 large, fast-changing corpora by hand. We describe an
                 automatic system that starts with a small sample of the
                 corpus in which topics have been assigned by hand, and
                 then updates the database with new documents as the
                 corpus grows, assigning topics to these new documents
                 with high speed and accuracy. To do this, we use
                 techniques from statistical pattern recognition to
                 efficiently separate the {\em feature\/} words, or {\em
                 discriminants}, from the {\em noise\/} words at each
                 node of the taxonomy. Using these, we build a
                 multilevel classifier. At each node, this classifier
                 can ignore the large number of ``noise'' words in a
                 document. Thus, the classifier has a small model size
                 and is very fast. Owing to the use of context-sensitive
                 features, the classifier is very accurate. As a
                 by-product, we can compute for each document a set of
                 terms that occur significantly more often in it than in
                 the classes to which it belongs. We describe the design
                 and implementation of our system, stressing how to
                 exploit standard, efficient relational operations like
                 sorts and joins. We report on experiences with the
                 Reuters newswire benchmark, the US patent database, and
                 web document samples from Yahoo!. We discuss
                 applications where our system can improve searching and
                 filtering capabilities.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Roy:1998:GCO,
  author =       "Prasan Roy and S. Seshadri and Abraham Silberschatz
                 and S. Sudarshan and S. Ashwin",
  title =        "Garbage Collection in Object-Oriented Databases Using
                 Transactional Cyclic Reference Counting",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "179--193",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Ashwin:S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Roy:Prasan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Seshadri:S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sudarshan:S=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007003/80070179.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070179.pdf",
  abstract =     "Garbage collection is important in object-oriented
                 databases to free the programmer from explicitly
                 deallocating memory. In this paper, we present a
                 garbage collection algorithm, called Transactional
                 Cyclic Reference Counting (TCRC), for object-oriented
                 databases. The algorithm is based on a variant of a
                 reference-counting algorithm proposed for functional
                 programming languages The algorithm keeps track of
                 auxiliary reference count information to detect and
                 collect cyclic garbage. The algorithm works correctly
                 in the presence of concurrently running transactions,
                 and system failures. It does not obtain any long-term
                 locks, thereby minimizing interference with transaction
                 processing. It uses recovery subsystem logs to detect
                 pointer updates; thus, existing code need not be
                 rewritten. Finally, it exploits schema information, if
                 available, to reduce costs. We have implemented the
                 TCRC algorithm and present results of a performance
                 study of the implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ng:1998:IRM,
  author =       "Wee Teck Ng and Peter M. Chen",
  title =        "Integrating Reliable Memory in Databases",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "3",
  pages =        "194--204",
  month =        aug,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:47 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chen:Peter_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Wee_Teck.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007003/80070194.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007003/80070194.pdf",
  abstract =     "Recent results in the Rio project at the University of
                 Michigan show that it is possible to create an area of
                 main memory that is as safe as disk from operating
                 system crashes. This paper explores how to integrate
                 the reliable memory provided by the Rio file cache into
                 a database system. Prior studies have analyzed the
                 performance benefits of reliable memory; we focus
                 instead on how different designs affect reliability. We
                 propose three designs for integrating reliable memory
                 into databases: non-persistent database buffer cache,
                 persistent database buffer cache, and persistent
                 database buffer cache with protection. Non-persistent
                 buffer caches use an I/O interface to reliable memory
                 and require the fewest modifications to existing
                 databases. However, they waste memory capacity and
                 bandwidth due to double buffering. Persistent buffer
                 caches use a memory interface to reliable memory by
                 mapping it into the database address space. This places
                 reliable memory under complete database control and
                 eliminates double buffering, but it may expose the
                 buffer cache to database errors. Our third design
                 reduces this exposure by write protecting the buffer
                 pages. Extensive fault tests show that mapping reliable
                 memory into the database address space does not
                 significantly hurt reliability. This is because wild
                 stores rarely touch dirty, committed pages written by
                 previous transactions. As a result, we believe that
                 databases should use a memory interface to reliable
                 memory.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "main memory database system (MMDB); recovery;
                 reliability",
}

@Article{Ozsu:1998:I,
  author =       "M. Tamer {\"O}zsu and Stavros Christodoulakis",
  title =        "Introduction",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "205--205",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ozsu:1998:SIM,
  author =       "M. Tamer {\"O}zsu and Stavros Christodoulakis",
  title =        "Special Issue on Multimedia Databases: Introduction",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "205--205",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 10:11:57 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsu:M=_Tamer.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Christodoulakis:Stavros.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070205.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070205.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Garofalakis:1998:PRS,
  author =       "Minos N. Garofalakis and Banu {\"O}zden and Avi
                 Silberschatz",
  title =        "On Periodic Resource scheduling for Continuous-Media
                 Databases",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "206--225",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 10:11:57 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zden:Banu.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Garofalakis:Minos_N=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Silberschatz:Abraham.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070206.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070206.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  xxauthor =     "Minos N. Garofalakis and Banu {\"O}zden and Abraham
                 Silberschatz",
}

@Article{Jiang:1998:STC,
  author =       "Haitao Jiang and Ahmed K. Elmagarmid",
  title =        "Spatial and Temporal Content-Based Access to
                 Hypervideo Databases",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "226--238",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/e/Elmagarmid:Ahmed_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jiang:Haitao.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070226.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070226.pdf",
  abstract =     "Providing content-based video query, retrieval and
                 browsing is the most important goal of a video database
                 management system (VDBMS). Video data is unique not
                 only in terms of its spatial and temporal
                 characteristics, but also in the semantic associations
                 manifested by the entities present in the video. This
                 paper introduces a novel video data model called {\em
                 Logical Hypervideo Data Model}. In addition to
                 multilevel video abstractions, the model is capable of
                 representing video entities that users are interested
                 in (defined as {\em hot objects\/}) and their semantic
                 associations with other logical video abstractions,
                 including hot objects themselves. The semantic
                 associations are modeled as {\em video hyperlinks\/}
                 and video data with such property are called {\em
                 hypervideo}. Video hyperlinks provide a flexible and
                 effective way of browsing video data. Based on the
                 proposed model, video queries can be specified with
                 both temporal and spatial constraints, as well as with
                 semantic descriptions of the video data. The
                 characteristics of hot objects' spatial and temporal
                 relations and efficient evaluation of them are also
                 discussed. Some query examples are given to demonstrate
                 the expressiveness of the video data model and query
                 language. Finally, we describe a modular video database
                 system architecture that our web-based prototype is
                 based on.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "content-based query; hot object; hypervideo; spatial
                 and temporal constraint; video database",
}

@Article{Ng:1998:OCO,
  author =       "Raymond T. Ng and Paul Shum",
  title =        "Optimal Clip Ordering for Multi-Clip Queries",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "239--252",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shum:Paul.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070239.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070239.pdf",
  abstract =     "A multi-clip query requests multiple video clips be
                 returned as the answer of the query. In many
                 applications and situations, the order in which these
                 clips are to be delivered does not matter that much to
                 the user. This allows the system ample opportunities to
                 optimize system throughput by using schedules that
                 maximize the effect of piggybacking. In this paper, we
                 study how to find such optimal schedules. In
                 particular, we consider two optimization criteria: (i)
                 one based on maximizing the number of piggybacked
                 clips, and (ii) the other based on maximizing the
                 impact on buffer space. We show that the optimal
                 schedule under the first criterion is equivalent to a
                 maximum matching in a suitably defined bipartite graph,
                 and that under the second criterion, the optimal
                 schedule is equivalent to a maximum matching in a
                 suitably defined weighted bipartite graph. Our
                 experimental results, which are based on realistic
                 distributions, indicate that both kinds of optimal
                 schedules can lead to a gain in throughput of over
                 300\%. And yet the time taken to compute such an
                 optimal schedule is negligible. Finally, we show how to
                 deal with clips that are variable in length.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "admission control; bipartite graph matching;
                 performance of multimedia systems",
}

@Article{Soffer:1998:ISI,
  author =       "Aya Soffer and Hanan Samet",
  title =        "Integrating Symbolic Images into a Multimedia Database
                 System Using Classification and Abstraction
                 Approaches",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "253--274",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Samet:Hanan.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Soffer:Aya.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070253.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070253.pdf",
  abstract =     "Symbolic images are composed of a finite set of
                 symbols that have a semantic meaning. Examples of
                 symbolic images include maps (where the semantic
                 meaning of the symbols is given in the legend),
                 engineering drawings, and floor plans. Two approaches
                 for supporting queries on symbolic-image databases that
                 are based on image content are studied. The
                 classification approach preprocesses all symbolic
                 images and attaches a semantic classification and an
                 associated certainty factor to each object that it
                 finds in the image. The abstraction approach describes
                 each object in the symbolic image by using a vector
                 consisting of the values of some of its features (e.g.,
                 shape, genus, etc.). The approaches differ in the way
                 in which responses to queries are computed. In the
                 classification approach, images are retrieved on the
                 basis of whether or not they contain objects that have
                 the same classification as the objects in the query. On
                 the other hand, in the abstraction approach, retrieval
                 is on the basis of similarity of feature vector values
                 of these objects. Methods of integrating these two
                 approaches into a relational multimedia database
                 management system so that symbolic images can be stored
                 and retrieved based on their content are described.
                 Schema definitions and indices that support query
                 specifications involving spatial as well as contextual
                 constraints are presented. Spatial constraints may be
                 based on both locational information (e.g., distance)
                 and relational information (e.g., north of). Different
                 strategies for image retrieval for a number of typical
                 queries using these approaches are described. Estimated
                 costs are derived for these strategies. Results are
                 reported of a comparative study of the two approaches
                 in terms of image insertion time, storage space,
                 retrieval accuracy, and retrieval time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "image indexing; multimedia databases; query
                 optimization; retrieval by content; spatial databases;
                 symbolic-image databases",
}

@Article{Zezula:1998:ASR,
  author =       "Pavel Zezula and Pasquale Savino and Giuseppe Amato
                 and Fausto Rabitti",
  title =        "Approximate Similarity Retrieval with {M}-Trees",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "275--293",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Amato:Giuseppe.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Rabitti:Fausto.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Savino:Pasquale.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zezula:Pavel.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070275.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070275.pdf",
  abstract =     "Motivated by the urgent need to improve the efficiency
                 of similarity queries, approximate similarity retrieval
                 is investigated in the environment of a metric tree
                 index called the M-tree. Three different approximation
                 techniques are proposed, which show how to forsake
                 query precision for improved performance. Measures are
                 defined that can quantify the improvements in
                 performance efficiency and the quality of
                 approximations. The proposed approximation techniques
                 are then tested on various synthetic and real-life
                 files. The evidence obtained from the experiments
                 confirms our hypothesis that a high-quality
                 approximated similarity search can be performed at a
                 much lower cost than that needed to obtain the exact
                 results. The proposed approximation techniques are
                 scalable and appear to be independent of the metric
                 used. Extensions of these techniques to the
                 environments of other similarity search indexes are
                 also discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access structures; approximation algorithms; distance
                 only data; performance evaluation; similarity search",
}

@Article{Balkir:1998:DPM,
  author =       "Nevzat Hurkan Balkir and Gultekin {\"O}zsoyoglu",
  title =        "Delivering Presentations from Multimedia Servers",
  journal =      j-VLDB-J,
  volume =       "7",
  number =       "4",
  pages =        "294--307",
  month =        dec,
  year =         "1998",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:48 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb7.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t8007004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/=/=Ouml=zsoyoglu:Gultekin.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Balkir:Nevzat_Hurkan.html;
                 http://link.springer.de/link/service/journals/00778/bibs/8007004/80070294.htm;
                 http://link.springer.de/link/service/journals/00778/papers/8007004/80070294.pdf",
  abstract =     "Most multimedia servers reported in the literature are
                 designed to serve multiple and independent video/audio
                 streams. We think that, in future, multimedia servers
                 will also serve complete presentations. Multimedia
                 presentations provide unique opportunities to develop
                 algorithms for buffer management and admission control,
                 as execution-time consumption requirements of
                 presentations are known a priori. In this paper, we
                 examine presentations in three different domains
                 (heavyweight, middleweight, and lightweight) and
                 provide buffer management and admission control
                 algorithms for the three domains. We propose two
                 improvements (flattening and dynamic-adjustments) on
                 the schedules created for the heavyweight
                 presentations. Results from a simulation environment
                 are presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "admission control; buffer management; flattening;
                 multimedia presentations",
}

@Article{Li:1999:FJU,
  author =       "Zhe Li and Kenneth A. Ross",
  title =        "Fast Joins Using Join Indices",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "1",
  pages =        "1--24",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:49 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Li:Zhe.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Ross:Kenneth_A=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008001/90080001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008001/90080001.pdf",
  abstract =     "Two new algorithms, ``Jive join'' and ``Slam join,''
                 are proposed for computing the join of two relations
                 using a join index. The algorithms are duals: Jive join
                 range-partitions input relation tuple ids and then
                 processes each partition, while Slam join forms ordered
                 runs of input relation tuple ids and then merges the
                 results. Both algorithms make a single sequential pass
                 through each input relation, in addition to one pass
                 through the join index and two passes through a
                 temporary file, whose size is half that of the join
                 index. Both algorithms require only that the number of
                 blocks in main memory is of the order of the square
                 root of the number of blocks in the smaller relation.
                 By storing intermediate and final join results in a
                 vertically partitioned fashion, our algorithms need to
                 manipulate less data in memory at a given time than
                 other algorithms. The algorithms are resistant to data
                 skew and adaptive to memory fluctuations. Selection
                 conditions can be incorporated into the algorithms.
                 Using a detailed cost model, the algorithms are
                 analyzed and compared with competing algorithms. For
                 large input relations, our algorithms perform
                 significantly better than Valduriez's algorithm, the
                 TID join algorithm, and hash join algorithms. An
                 experimental study is also conducted to validate the
                 analytical results and to demonstrate the performance
                 characteristics of each algorithm in practice.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "decision support systems; query processing",
  remark =       "Check month: April or May??",
}

@Article{Harder:1999:IPS,
  author =       "Theo H{\"a}rder and G{\"u}nter Sauter and Joachim
                 Thomas",
  title =        "The Intrinsic Problems of Structural Heterogeneity and
                 an Approach to Their Solution",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "1",
  pages =        "25--43",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:49 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/H=auml=rder:Theo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sauter:G=uuml=nter.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Thomas:Joachim.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008001/90080025.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008001/90080025.pdf",
  abstract =     "This paper focuses on the problems that arise when
                 integrating data from heterogeneous sources in a
                 single, unified database view. At first, we give a
                 detailed analysis of the kinds of structural
                 heterogeneity that occur when unified views are derived
                 from different database systems. We present the results
                 in a multiple tier architecture which distinguishes
                 different levels of heterogeneity and relates them to
                 their underlying causes as well as to the mapping
                 conflicts resulting from the view derivation process.
                 As the second essential contribution, the paper
                 presents our approach to a mapping language solving the
                 identified conflicts. The main characteristics of the
                 language are its descriptiveness, its capability to map
                 between schemas written in the relational,
                 object-oriented, ER, or EXPRESS data model, and its
                 facilities for specifying user-defined update
                 operations on the view that are to be propagated to the
                 data sources. Finally, we briefly discuss how this
                 mapping information is employed to convert queries
                 formulated with respect to the integrated view, into
                 database operations over the heterogeneous data
                 sources.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "heterogeneity; legacy systems; mapping language;
                 schema integration; schema mapping; updatable views",
  remark =       "Check month: April or May??",
}

@Article{Huang:1999:CTP,
  author =       "Yueh-Min Huang and Jen-Wen Ding and Shiao-Li Tsao",
  title =        "Constant Time Permutation: An Efficient Block
                 Allocation Strategy for Variable-Bit-Rate Continuous
                 Media Data",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "1",
  pages =        "44--54",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:49 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/Ding:Jen=Wen.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Huang:Yueh=Min.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tsao:Shiao=Li.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008001/90080044.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008001/90080044.pdf",
  abstract =     "To provide high accessibility of continuous-media (CM)
                 data, CM servers generally stripe data across multiple
                 disks. Currently, the most widely used striping scheme
                 for CM data is round-robin permutation (RRP).
                 Unfortunately, when RRP is applied to variable-bit-rate
                 (VBR) CM data, load imbalance across multiple disks
                 occurs, thereby reducing overall system performance. In
                 this paper, the performance of a VBR CM server with RRP
                 is analyzed. In addition, we propose an efficient
                 striping scheme called constant time permutation (CTP),
                 which takes the VBR characteristic into account and
                 obtains a more balanced load than RRP. Analytic models
                 of both RRP and CTP are presented, and the models are
                 verified via trace-driven simulations. Analysis and
                 simulation results show that CTP can substantially
                 increase the number of clients supported, though it
                 might introduce a few seconds/minutes of initial
                 delay.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous-media server; data placement; load
                 balancing; striping; video-on-demand (VOD)",
  remark =       "Check month: April or May??",
}

@Article{Kabra:1999:OOO,
  author =       "Navin Kabra and David J. DeWitt",
  title =        "{OPT++}: an object-oriented implementation for
                 extensible database query optimization",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "1",
  pages =        "55--78",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:49 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/d/DeWitt:David_J=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kabra:Navin.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008001/90080055.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008001/90080055.pdf",
  abstract =     "In this paper we describe the design and
                 implementation of OPT++, a tool for extensible database
                 query optimization that uses an object-oriented design
                 to simplify the task of implementing, extending, and
                 modifying an optimizer. Building an optimizer using
                 OPT++ makes it easy to extend the query algebra (to add
                 new query algebra operators and physical implementation
                 algorithms to the system), easy to change the search
                 space, and also to change the search strategy.
                 Furthermore, OPT++ comes equipped with a number of
                 search strategies that are available for use by an
                 optimizer-implementor. OPT++ considerably simplifies
                 both, the task of implementing an optimizer for a new
                 database system, and the task of evaluating alternative
                 optimization techniques and strategies to decide what
                 techniques are best suited for that database system. We
                 present the results of a series of performance studies.
                 These results validate our design and show that, in
                 spite of its flexibility, OPT++ can be used to build
                 efficient optimizers.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "extensibility; object-relational databases; query
                 optimization; software architecture",
  remark =       "Check month: April or May??",
}

@Article{Krivokapic:1999:DDD,
  author =       "Natalija Krivokapi{\'c} and Alfons Kemper and Ehud
                 Gudes",
  title =        "Deadlock Detection in Distributed Database Systems:
                 a New Algorithm and a Comparative Performance
                 Analysis",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "2",
  pages =        "79--100",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:50 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gudes:Ehud.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Krivokapic:Natalija.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008002/90080079.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008002/90080079.pdf",
  abstract =     "This paper attempts a comprehensive study of deadlock
                 detection in distributed database systems. First, the
                 two predominant deadlock models in these systems and
                 the four different distributed deadlock detection
                 approaches are discussed. Afterwards, a new deadlock
                 detection algorithm is presented. The algorithm is
                 based on dynamically creating {\em deadlock detection
                 agents\/} (DDAs), each being responsible for detecting
                 deadlocks in one connected component of the global
                 wait-for-graph (WFG). The DDA scheme is a
                 ``self-tuning'' system: after an initial warm-up phase,
                 dedicated DDAs will be formed for ``centers of
                 locality'', i.e., parts of the system where many
                 conflicts occur. A dynamic shift in locality of the
                 distributed system will be responded to by
                 automatically creating new DDAs while the obsolete ones
                 terminate. In this paper, we also compare the most
                 competitive representative of each class of algorithms
                 suitable for distributed database systems based on a
                 simulation model, and point out their relative
                 strengths and weaknesses. The extensive experiments we
                 carried out indicate that our newly proposed deadlock
                 detection algorithm outperforms the other algorithms in
                 the vast majority of configurations and workloads and,
                 in contrast to all other algorithms, is very robust
                 with respect to differing load and access profiles.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "comparative performance analysis; deadlock detection;
                 distributed database systems; simulation study",
  remark =       "Check month: April or May??",
}

@Article{Boncz:1999:MPQ,
  author =       "Peter A. Boncz and Martin L. Kersten",
  title =        "{MIL} primitives for querying a fragmented world",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "2",
  pages =        "101--119",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:50 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Boncz:Peter_A=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kersten:Martin_L=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008002/90080101.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008002/90080101.pdf",
  abstract =     "In query-intensive database application areas, like
                 decision support and data mining, systems that use
                 vertical fragmentation have a significant performance
                 advantage. In order to support relational or object
                 oriented applications on top of such a fragmented data
                 model, a flexible yet powerful intermediate language is
                 needed. This problem has been successfully tackled in
                 Monet, a modern extensible database kernel developed by
                 our group. We focus on the design choices made in the
                 Monet interpreter language (MIL), its algebraic query
                 language, and outline how its concept of tactical
                 optimization enhances and simplifies the optimization
                 of complex queries. Finally, we summarize the
                 experience gained in Monet by creating a highly
                 efficient implementation of MIL.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database systems; main-memory techniques; query
                 languages; query optimization; vertical fragmentation",
  remark =       "Check month: April or May??",
}

@Article{Aslan:1999:SHR,
  author =       "Goksel Aslan and Dennis McLeod",
  title =        "Semantic Heterogeneity Resolution in Federated
                 Databases by Metadata Implantation and Stepwise
                 Evolution",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "2",
  pages =        "120--132",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:50 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Aslan:Goksel.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/McLeod:Dennis.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008002/90080120.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008002/90080120.pdf",
  abstract =     "A key aspect of interoperation among data-intensive
                 systems involves the mediation of metadata and
                 ontologies across database boundaries. One way to
                 achieve such mediation between a local database and a
                 remote database is to fold remote metadata into the
                 local metadata, thereby creating a common platform
                 through which information sharing and exchange becomes
                 possible. Schema implantation and semantic evolution,
                 our approach to the metadata folding problem, is a
                 partial database integration scheme in which remote and
                 local (meta)data are integrated in a stepwise manner
                 over time. We introduce metadata implantation and
                 stepwise evolution techniques to interrelate database
                 elements in different databases, and to resolve
                 conflicts on the structure and semantics of database
                 elements (classes, attributes, and individual
                 instances). We employ a semantically rich canonical
                 data model, and an incremental integration and semantic
                 heterogeneity resolution scheme. In our approach,
                 relationships between local and remote information
                 units are determined whenever enough knowledge about
                 their semantics is acquired. The metadata folding
                 problem is solved by implanting remote database
                 elements into the local database, a process that
                 imports remote database elements into the local
                 database environment, hypothesizes the relevance of
                 local and remote classes, and customizes the
                 organization of remote metadata. We have implemented a
                 prototype system and demonstrated its use in an
                 experimental neuroscience environment.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database integration; database interoperability;
                 federated databases; schema evolution; semantic
                 heterogeneity resolution",
  remark =       "Check month: April or May??",
}

@Article{Law:1999:ESI,
  author =       "Kelvin K. W. Law and John C. S. Lui and Leana
                 Golubchik",
  title =        "Efficient Support for Interactive Service in
                 Multi-Resolution {VOD} Systems",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "2",
  pages =        "133--153",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:50 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t9008002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Golubchik:Leana.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Law:Kelvin_K=_W=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lui:John_C=_S=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/9008002/90080133.htm;
                 http://link.springer.de/link/service/journals/00778/papers/9008002/90080133.pdf",
  abstract =     "Advances in high-speed networks and multimedia
                 technologies have made it feasible to provide
                 video-on-demand (VOD) services to users. However, it is
                 still a challenging task to design a cost-effective VOD
                 system that can support a large number of clients (who
                 may have different quality of service (QoS)
                 requirements) and, at the same time, provide different
                 types of VCR functionalities. Although it has been
                 recognized that VCR operations are important
                 functionalities in providing VOD service, techniques
                 proposed in the past for providing VCR operations may
                 require additional system resources, such as extra disk
                 I/O, additional buffer space, as well as network
                 bandwidth. In this paper, we consider the design of a
                 VOD storage server that has the following features: (1)
                 provision of different levels of display resolutions to
                 users who have different QoS requirements, (2)
                 provision of different types of VCR functionalities,
                 such as fast forward and rewind, without imposing
                 additional demand on the system buffer space, I/O
                 bandwidth, and network bandwidth, and (3) guarantees of
                 the load-balancing property across all disks during
                 normal and VCR display periods. The above-mentioned
                 features are especially important because they simplify
                 the design of the buffer space, I/O, and network
                 resource allocation policies of the VOD storage system.
                 The load-balancing property also ensures that no single
                 disk will be the bottleneck of the system. In this
                 paper, we propose data block placement, admission
                 control, and I/O-scheduling algorithms, as well as
                 determine the corresponding buffer space requirements
                 of the proposed VOD storage system. We show that the
                 proposed VOD system can provide VCR and
                 multi-resolution services to the viewing clients and at
                 the same time maintain the load-balancing property.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "interactive services; multi-resolution services;
                 multimedia servers; VOD systems",
  remark =       "Check month: April or May??",
}

@Article{Shmueli:2000:FVP,
  author =       "O. Shmueli and J. Widom",
  title =        "Foreword by the {VLDB} `98 {PC Chairmen}",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "155--155",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Widom:2000:BPV,
  author =       "Jennifer Widom and Oded Shmueli",
  title =        "Best Papers of {VLDB `98, New York: Foreword by the
                 VLDB `98 PC Chairmen: Best Papers of VLDB `98}",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "155--155",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 10:11:55 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Shmueli:Oded.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Widom:Jennifer.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080155.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080155.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  xxauthor =     "O. Shmueli and J. Widom",
}

@Article{Braumandl:2000:FJP,
  author =       "Reinhard Braumandl and Jens Clau{\ss}en and Alfons
                 Kemper and Donald Kossmann",
  title =        "Functional-Join Processing",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "156--177",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Braumandl:Reinhard.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Clau=szlig=en:Jens.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kemper:Alfons.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kossmann:Donald.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080156.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080156.pdf",
  abstract =     "Inter-object references are one of the key concepts of
                 object-relational and object-oriented database systems.
                 In this work, we investigate alternative techniques to
                 implement inter-object references and make the best use
                 of them in query processing, i.e., in evaluating
                 functional joins. We will give a comprehensive overview
                 and performance evaluation of all known techniques for
                 simple (single-valued) as well as multi-valued
                 functional joins. Furthermore, we will describe special
                 {\em order-preserving\/\/} functional-join techniques
                 that are particularly attractive for decision support
                 queries that require ordered results. While most of the
                 presentation of this paper is focused on
                 object-relational and object-oriented database systems,
                 some of the results can also be applied to plain
                 relational databases because {\em index nested-loop
                 joins\/\/} along key/foreign-key relationships, as they
                 are frequently found in relational databases, are just
                 one particular way to execute a functional join.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "functional join; logical OID; object identifier;
                 order-preserving join; physical OID; pointer join;
                 query processing",
}

@Article{George:2000:SBF,
  author =       "Binto George and Jayant R. Haritsa",
  title =        "Secure Buffering in Firm Real-Time Database Systems",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "178--198",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/George:Binto.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/h/Haritsa:Jayant_R=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080178.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080178.pdf",
  abstract =     "Many real-time database applications arise in
                 electronic financial services, safety-critical
                 installations and military systems where enforcing is
                 crucial to the success of the enterprise. We
                 investigate here the performance implications, in terms
                 of killed transactions, of guaranteeing {\em
                 multi-level secrecy\/} in a real-time database system
                 supporting applications with {\em firm\/} deadlines. In
                 particular, we focus on the {\em buffer management\/}
                 aspects of this issue. Our main contributions are the
                 following. First, we identify the importance and
                 difficulties of providing secure buffer management in
                 the real-time database environment. Second, we present
                 , a novel buffer management algorithm that provides
                 {\em covert-channel-free\/} security. SABRE employs a
                 fully dynamic one-copy allocation policy for efficient
                 usage of buffer resources. It also incorporates several
                 optimizations for reducing the overall number of killed
                 transactions and for decreasing the unfairness in the
                 distribution of killed transactions across security
                 levels. Third, using a detailed simulation model, the
                 real-time performance of SABRE is evaluated against
                 unsecure conventional and real-time buffer management
                 policies for a variety of security-classified
                 transaction workloads and system configurations. Our
                 experiments show that SABRE provides security with only
                 a modest drop in real-time performance. Finally, we
                 evaluate SABRE's performance when augmented with the
                 GUARD adaptive admission control policy. Our
                 experiments show that this combination provides close
                 to ideal fairness for real-time applications that can
                 tolerate covert-channel bandwidths of up to one bit per
                 second (a limit specified in military standards).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "buffer management; covert channels; firm deadlines;
                 real-time database",
}

@Article{Muth:2000:LLS,
  author =       "Peter Muth and Patrick E. O'Neil and Achim Pick and
                 Gerhard Weikum",
  title =        "The {LHAM} Log-Structured History Data Access Method",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "199--221",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Muth:Peter.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/O=Neil:Patrick_E=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pick:Achim.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/w/Weikum:Gerhard.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080199.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080199.pdf",
  abstract =     "Numerous applications such as stock market or medical
                 information systems require that both historical and
                 current data be logically integrated into a temporal
                 database. The underlying access method must support
                 different forms of ``time-travel'' queries, the
                 migration of old record versions onto inexpensive
                 archive media, and high insertion and update rates.
                 This paper presents an access method for
                 transaction-time temporal data, called the
                 log-structured history data access method (LHAM) that
                 meets these demands. The basic principle of LHAM is to
                 partition the data into successive components based on
                 the timestamps of the record versions. Components are
                 assigned to different levels of a storage hierarchy,
                 and incoming data is continuously migrated through the
                 hierarchy. The paper discusses the LHAM concepts,
                 including concurrency control and recovery, our
                 full-fledged LHAM implementation, and experimental
                 performance results based on this implementation. A
                 detailed comparison with the TSB-tree, both
                 analytically and based on experiments with real
                 implementations, shows that LHAM is highly superior in
                 terms of insert performance, while query performance is
                 in almost all cases at least as good as for the
                 TSB-tree; in many cases it is much better.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehouses; index structures; performance;
                 storage systems; temporal databases",
}

@Article{Gibson:2000:CCD,
  author =       "David Gibson and Jon M. Kleinberg and Prabhakar
                 Raghavan",
  title =        "Clustering Categorical Data: An Approach Based on
                 Dynamical Systems",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "222--236",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gibson:David.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kleinberg:Jon_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raghavan:Prabhakar.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080222.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080222.pdf",
  abstract =     "We describe a novel approach for clustering
                 collections of sets, and its application to the
                 analysis and mining of categorical data. By
                 ``categorical data,'' we mean tables with fields that
                 cannot be naturally ordered by a metric --- e.g., the
                 names of producers of automobiles, or the names of
                 products offered by a manufacturer. Our approach is
                 based on an iterative method for assigning and
                 propagating weights on the categorical values in a
                 table; this facilitates a type of similarity measure
                 arising from the co-occurrence of values in the
                 dataset. Our techniques can be studied analytically in
                 terms of certain types of non-linear dynamical
                 systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "categorical data; clustering; data mining; dynamical
                 systems; hypergraphs",
}

@Article{Knorr:2000:DBO,
  author =       "Edwin M. Knorr and Raymond T. Ng and Vladimir
                 Tucakov",
  title =        "Distance-Based Outliers: Algorithms and Applications",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "237--253",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Knorr:Edwin_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/n/Ng:Raymond_T=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tucakov:V=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080237.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080237.pdf",
  abstract =     "This paper deals with finding outliers (exceptions) in
                 large, multidimensional datasets. The identification of
                 outliers can lead to the discovery of truly unexpected
                 knowledge in areas such as electronic commerce, credit
                 card fraud, and even the analysis of performance
                 statistics of professional athletes. Existing methods
                 that we have seen for finding outliers can only deal
                 efficiently with two dimensions/attributes of a
                 dataset. In this paper, we study the notion of {\em
                 DB\/} ({\em distance-based\/}) outliers. Specifically,
                 we show that (i) outlier detection can be done {\em
                 efficiently\/} for {\em large\/} datasets, and for
                 $k$-dimensional datasets with large values of $k$
                 (e.g., $k \ge 5$); and (ii), outlier detection is a
                 {\em meaningful\/} and important knowledge discovery
                 task. First, we present two simple algorithms, both
                 having a complexity of $O(k \: N^2)$, $k$ being the
                 dimensionality and $N$ being the number of objects in
                 the dataset. These algorithms readily support datasets
                 with many more than two attributes. Second, we present
                 an optimized cell-based algorithm that has a complexity
                 that is linear with respect to $N$, but exponential
                 with respect to $k$. We provide experimental results
                 indicating that this algorithm significantly
                 outperforms the two simple algorithms for $k \leq 4$.
                 Third, for datasets that are mainly disk-resident, we
                 present another version of the cell-based algorithm
                 that guarantees at most three passes over a dataset.
                 Again, experimental results show that this algorithm is
                 by far the best for $k \leq 4$. Finally, we discuss our
                 work on three real-life applications, including one on
                 spatio-temporal data (e.g., a video surveillance
                 application), in order to confirm the relevance and
                 broad applicability of {\em DB\/} outliers.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "algorithms; data mining; data mining applications;
                 outliers\slash exceptions",
}

@Article{Korn:2000:QDM,
  author =       "Flip Korn and Alexandros Labrinidis and Yannis Kotidis
                 and Christos Faloutsos",
  title =        "Quantifiable Data Mining Using Ratio Rules",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "254--266",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Faloutsos:Christos.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Korn:Flip.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/k/Kotidis:Yannis.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Labrinidis:Alexandros.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080254.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080254.pdf",
  abstract =     "Association Rule Mining algorithms operate on a data
                 matrix (e.g., customers $\times$ products) to derive
                 association rules [AIS93b, SA96]. We propose a new
                 paradigm, namely, {\em Ratio Rules}, which are
                 quantifiable in that we can measure the ``goodness'' of
                 a set of discovered rules. We also propose the
                 ``guessing error'' as a measure of the ``goodness'',
                 that is, the root-mean-square error of the
                 reconstructed values of the cells of the given matrix,
                 when we pretend that they are unknown. Another
                 contribution is a novel method to guess missing/hidden
                 values from the Ratio Rules that our method derives.
                 For example, if somebody bought $10 of milk and $3 of
                 bread, our rules can ``guess'' the amount spent on
                 butter. Thus, unlike association rules, Ratio Rules can
                 perform a variety of important tasks such as
                 forecasting, answering ``what-if'' scenarios, detecting
                 outliers, and visualizing the data. Moreover, we show
                 that we can compute Ratio Rules in a {\em single\/}
                 pass over the data set with small memory requirements
                 (a few small matrices), in contrast to association rule
                 mining methods which require multiple passes and/or
                 large memory. Experiments on several real data sets
                 (e.g., basketball and baseball statistics, biological
                 data) demonstrate that the proposed method: (a) leads
                 to rules that make sense; (b) can find large itemsets
                 in binary matrices, even in the presence of noise; and
                 (c) consistently achieves a ``guessing error'' of up to
                 5 times less than using straightforward column
                 averages.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data mining; forecasting; guessing error; knowledge
                 discovery",
}

@Article{Torp:2000:ETD,
  author =       "Kristian Torp and Christian S. Jensen and Richard
                 Thomas Snodgrass",
  title =        "Effective Timestamping in Databases",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "267--288",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/j/Jensen:Christian_S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Snodgrass:Richard_T=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Torp:Kristian.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080267.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080267.pdf",
  abstract =     "Many existing database applications place various
                 timestamps on their data, rendering temporal values
                 such as dates and times prevalent in database tables.
                 During the past two decades, several dozen temporal
                 data models have appeared, all with timestamps being
                 integral components. The models have used timestamps
                 for encoding two specific temporal aspects of database
                 facts, namely transaction time, when the facts are
                 current in the database, and valid time, when the facts
                 are true in the modeled reality. However, with few
                 exceptions, the assignment of timestamp values has been
                 considered only in the context of individual
                 modification statements. This paper takes the next
                 logical step: It considers the use of timestamping for
                 capturing transaction and valid time in the context of
                 transactions. The paper initially identifies and
                 analyzes several problems with straightforward
                 timestamping, then proceeds to propose a variety of
                 techniques aimed at solving these problems.
                 Timestamping the results of a transaction with the
                 commit time of the transaction is a promising approach.
                 The paper studies how this timestamping may be done
                 using a spectrum of techniques. While many database
                 facts are valid until {\em now}, the current time, this
                 value is absent from the existing temporal types.
                 Techniques that address this problem using different
                 substitute values are presented. Using a stratum
                 architecture, the performance of the different proposed
                 techniques are studied. Although querying and modifying
                 time-varying data is accompanied by a number of subtle
                 problems, we present a comprehensive approach that
                 provides application programmers with simple,
                 consistent, and efficient support for modifying
                 bitemporal databases in the context of user
                 transactions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "timestamping; transactions",
}

@Article{Sheikholeslami:2000:WWB,
  author =       "Gholamhosein Sheikholeslami and Surojit Chatterjee and
                 Aidong Zhang",
  title =        "{WaveCluster}: a Wavelet Based Clustering Approach
                 for Spatial Data in Very Large Databases",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "289--304",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chatterjee:Surojit.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sheikholeslami:Gholamhosein.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhang:Aidong.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080289.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080289.pdf",
  abstract =     "Many applications require the management of spatial
                 data in a multidimensional feature space. Clustering
                 large spatial databases is an important problem, which
                 tries to find the densely populated regions in the
                 feature space to be used in data mining, knowledge
                 discovery, or efficient information retrieval. A good
                 clustering approach should be efficient and detect
                 clusters of arbitrary shape. It must be insensitive to
                 the noise (outliers) and the order of input data. We
                 propose {\em WaveCluster}, a novel clustering approach
                 based on wavelet transforms, which satisfies all the
                 above requirements. Using the multiresolution property
                 of wavelet transforms, we can effectively identify
                 arbitrarily shaped clusters at different degrees of
                 detail. We also demonstrate that {\em WaveCluster\/} is
                 highly efficient in terms of time complexity.
                 Experimental results on very large datasets are
                 presented, which show the efficiency and effectiveness
                 of the proposed approach compared to the other recent
                 clustering methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pacitti:2000:UPS,
  author =       "Esther Pacitti and Eric Simon",
  title =        "Update Propagation Strategies to Improve Freshness in
                 Lazy Master Replicated Databases",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "305--318",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pacitti:Esther.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Simon:Eric.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080305.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080305.pdf",
  abstract =     "Many distributed database applications need to
                 replicate data to improve data availability and query
                 response time. The two-phase commit protocol guarantees
                 mutual consistency of replicated data but does not
                 provide good performance. Lazy replication has been
                 used as an alternative solution in several types of
                 applications such as on-line financial transactions and
                 telecommunication systems. In this case, mutual
                 consistency is relaxed and the concept of freshness is
                 used to measure the deviation between replica copies.
                 In this paper, we propose two update propagation
                 strategies that improve freshness. Both of them use
                 immediate propagation: updates to a primary copy are
                 propagated towards a slave node as soon as they are
                 detected at the master node without waiting for the
                 commitment of the update transaction. Our performance
                 study shows that our strategies can improve data
                 freshness by up to five times compared with the
                 deferred approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data replication; distributed databases; performance
                 evaluation",
}

@Article{Liang:2000:OMD,
  author =       "Weifa Liang and Maria E. Orlowska and Jeffrey X. Yu",
  title =        "Optimizing Multiple Dimensional Queries Simultaneously
                 in Multidimensional Databases",
  journal =      j-VLDB-J,
  volume =       "8",
  number =       "3--4",
  pages =        "319--338",
  month =        feb,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:51 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb8.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0008003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Liang:Weifa.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/o/Orlowska:Maria_E=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/y/Yu:Jeffrey_X=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0008003/00080319.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0008003/00080319.pdf",
  abstract =     "Some significant progress related to multidimensional
                 data analysis has been achieved in the past few years,
                 including the design of fast algorithms for computing
                 datacubes, selecting some precomputed group-bys to
                 materialize, and designing efficient storage structures
                 for multidimensional data. However, little work has
                 been carried out on multidimensional query optimization
                 issues. Particularly the response time (or evaluation
                 cost) for answering several related dimensional queries
                 simultaneously is crucial to the OLAP applications.
                 Recently, Zhao et al. first exploited this problem by
                 presenting three heuristic algorithms. In this paper we
                 first consider in detail two cases of the problem in
                 which all the queries are either hash-based star joins
                 or index-based star joins only. In the case of the
                 hash-based star join, we devise a polynomial
                 approximation algorithm which delivers a plan whose
                 evaluation cost is $O(n^\epsilon)$ times the optimal,
                 where $n$ is the number of queries and $\epsilon$ is a
                 fixed constant with $0 < \epsilon \leq 1$. We also
                 present an exponential algorithm which delivers a plan
                 with the optimal evaluation cost. In the case of the
                 index-based star join, we present a heuristic algorithm
                 which delivers a plan whose evaluation cost is $n$
                 times the optimal, and an exponential algorithm which
                 delivers a plan with the optimal evaluation cost. We
                 then consider a general case in which both hash-based
                 star-join and index-based star-join queries are
                 included. For this case, we give a possible improvement
                 on the work of Zhao et al., based on an analysis of
                 their solutions. We also develop another heuristic and
                 an exact algorithm for the problem. We finally conduct
                 a performance study by implementing our algorithms. The
                 experimental results demonstrate that the solutions
                 delivered for the restricted cases are always within
                 two times of the optimal, which confirms our
                 theoretical upper bounds. Actually these experiments
                 produce much better results than our theoretical
                 estimates. To the best of our knowledge, this is the
                 only development of polynomial algorithms for the first
                 two cases which are able to deliver plans with
                 deterministic performance guarantees in terms of the
                 qualities of the plans generated. The previous
                 approaches including that of [ZDNS98] may generate a
                 feasible plan for the problem in these two cases, but
                 they do not provide any performance guarantee, i.e.,
                 the plans generated by their algorithms can be
                 arbitrarily far from the optimal one.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehousing; MDDBs; multiple dimensional query
                 optimization; OLAP; query modeling",
}

@Article{Atzeni:2000:DWG,
  author =       "Paolo Atzeni and Alberto O. Mendelzon",
  title =        "Databases and the {Web}: Guest Editorial: Databases
                 and the {Web}",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "1--1",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 27 10:11:55 MDT 2000",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/a/Atzeni:Paolo.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mendelzon:Alberto_O=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090001.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Atzeni:2000:GE,
  author =       "Paolo Atzeni and Alberto O. Mendelzon",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "1--1",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chidlovskii:2000:SCW,
  author =       "Boris Chidlovskii and Uwe M. Borghoff",
  title =        "Semantic caching of {Web} queries",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "2--17",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Borghoff:Uwe_M=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chidlovskii:Boris.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090002.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090002.pdf",
  abstract =     "In meta-searchers accessing distributed Web-based
                 information repositories, performance is a major issue.
                 Efficient query processing requires an appropriate
                 caching mechanism. Unfortunately, standard page-based
                 as well as tuple-based caching mechanisms designed for
                 conventional databases are not efficient on the Web,
                 where keyword-based querying is often the only way to
                 retrieve data. In this work, we study the problem of
                 semantic caching of Web queries and develop a caching
                 mechanism for conjunctive Web queries based on {\em
                 signature files}. Our algorithms cope with both
                 relations of semantic containment and intersection
                 between a query and the corresponding cache items. We
                 also develop the cache replacement strategy to treat
                 situations when cached items differ in size and
                 contribution when providing partial query answers. We
                 report results of experiments and show how the caching
                 mechanism is realized in the Knowledge Broker system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "experiments; query algorithms; region containment;
                 semantic caching; signature files",
}

@Article{Gruser:2000:LRT,
  author =       "Jean-Robert Gruser and Louiqa Raschid and Vladimir
                 Zadorozhny and Tao Zhan",
  title =        "Learning response time for {WebSources} using query
                 feedback and application in query optimization",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "18--37",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/g/Gruser:Jean=Robert.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/r/Raschid:Louiqa.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zadorozhny:Vladimir.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/z/Zhan:Tao.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090018.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090018.pdf",
  abstract =     "The rapid growth of the Internet and support for
                 interoperability protocols has increased the number of
                 Web accessible sources, WebSources. Current wrapper
                 mediator architectures need to be extended with a
                 wrapper cost model (WCM) for WebSources that can
                 estimate the response time (delays) to access sources
                 as well as other relevant statistics. In this paper, we
                 present a Web prediction tool (WebPT), a tool that is
                 based on learning using query feedback from WebSources.
                 The WebPT uses dimensions time of day, day, and
                 quantity of data, to learn response times from a
                 particular WebSource, and to predict the expected
                 response time (delay) for some query. Experiment data
                 was collected from several sources, and those
                 dimensions that were significant in estimating the
                 response time were determined. We then trained the
                 WebPT on the collected data, to use the three
                 dimensions mentioned above, and to predict the response
                 time, as well as a confidence in the prediction. We
                 describe the WebPT learning algorithms, and report on
                 the WebPT learning for WebSources. Our research shows
                 that we can improve the quality of learning by tuning
                 the WebPT features, e.g., training the WebPT using a
                 logarithm of the input training data; including
                 significant dimensions in the WebPT; or changing the
                 ordering of dimensions. A comparison of the WebPT with
                 more traditional neural network (NN) learning has been
                 performed, and we briefly report on the comparison. We
                 then demonstrate how the WebPT prediction of delay may
                 be used by a scrambling enabled optimizer. A scrambling
                 algorithm identifies some critical points of delay,
                 where it makes a decision to scramble (modify) a plan,
                 to attempt to hide the expected delay by computing some
                 other part of the plan that is unaffected by the delay.
                 We explore the space of real delay at a WebSource,
                 versus the WebPT prediction of this delay, with respect
                 to critical points of delay in specific plans. We
                 identify those cases where WebPT overestimation or
                 underestimation of the real delay results in a penalty
                 in the scrambling enabled optimizer, and those cases
                 where there is no penalty. Using the experimental data
                 and WebPT learning, we test how good the WebPT is in
                 minimizing these penalties.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data-intensive applications on the Web; query
                 languages and systems for Web data",
}

@Article{Fernandez:2000:DSW,
  author =       "Mary Fern{\'a}ndez and Daniela Florescu and Alon Levy
                 and Dan Suciu",
  title =        "Declarative specification of {Web} sites with {S}",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "38--55",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fernandez:Mary_F=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Florescu:Daniela.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Levy:Alon_Y=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Suciu:Dan.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090038.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090038.pdf",
  abstract =     "S is a system for implementing {\em data-intensive\/}
                 Web sites, which typically integrate information from
                 multiple data sources and have complex structure. S's
                 key idea is separating the management of a Web site's
                 data, the specification of its content and structure,
                 and the visual representation of its pages. S provides
                 a declarative {\em query language\/} for specifying a
                 site's content and structure, and a simple {\em
                 template language\/} for specifying a site's HTML
                 representation. This paper contains a comprehensive
                 description of the S system and details the benefits of
                 declarative site specification. We describe our
                 experiences using S in a production application and
                 describe three different, but complementary, systems
                 that extend and improve upon S's original ideas.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "declarative query languages; web-site management",
  xxauthor =     "Mary F. Fernandez and Daniela Florescu and Alon Y.
                 Levy and Dan Suciu",
  xxtitle =      "Declarative Specification of {Web} Sites with
                 {Strudel}",
}

@Article{Berendt:2000:ANB,
  author =       "Bettina Berendt and Myra Spiliopoulou",
  title =        "Analysis of navigation behaviour in {Web} sites
                 integrating multiple information systems",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "56--75",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Berendt:Bettina.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Spiliopoulou:Myra.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090056.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090056.pdf",
  abstract =     "The analysis of web usage has mostly focused on sites
                 composed of conventional static pages. However, huge
                 amounts of information available in the web come from
                 databases or other data collections and are presented
                 to the users in the form of dynamically generated
                 pages. The query interfaces of such sites allow the
                 specification of many search criteria. Their generated
                 results support navigation to pages of results
                 combining cross-linked data from many sources. For the
                 analysis of visitor navigation behaviour in such web
                 sites, we propose the web usage miner (WUM), which
                 discovers navigation patterns subject to advanced
                 statistical and structural constraints. Since our
                 objective is the discovery of interesting navigation
                 patterns, we do not focus on accesses to individual
                 pages. Instead, we construct conceptual hierarchies
                 that reflect the query capabilities used in the
                 production of those pages. Our experiments with a real
                 web site that integrates data from multiple databases,
                 the German SchulWeb, demonstrate the appropriateness of
                 WUM in discovering navigation patterns and show how
                 those discoveries can help in assessing and improving
                 the quality of the site.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "conceptual hierarchies; data mining; query
                 capabilities; Web databases; Web query interfaces; Web
                 usage mining",
}

@Article{Buneman:2000:UQL,
  author =       "Peter Buneman and Mary F. Fernandez and Dan Suciu",
  title =        "{UnQL}: a query language and algebra for
                 semistructured data based on structural recursion",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "1",
  pages =        "76--110",
  month =        mar,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:52 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/b/Buneman:Peter.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fernandez:Mary_F=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Suciu:Dan.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009001/00090076.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009001/00090076.pdf",
  abstract =     "This paper presents structural recursion as the basis
                 of the syntax and semantics of query languages for
                 semistructured data and XML. We describe a simple and
                 powerful query language based on pattern matching and
                 show that it can be expressed using structural
                 recursion, which is introduced as a top-down, recursive
                 function, similar to the way XSL is defined on XML
                 trees. On cyclic data, structural recursion can be
                 defined in two equivalent ways: as a recursive function
                 which evaluates the data top-down and remembers all its
                 calls to avoid infinite loops, or as a bulk evaluation
                 which processes the entire data in parallel using only
                 traditional relational algebra operators. The latter
                 makes it possible for optimization techniques in
                 relational queries to be applied to structural
                 recursion. We show that the composition of two
                 structural recursion queries can be expressed as a
                 single such query, and this is used as the basis of an
                 optimization method for mediator systems. Several other
                 formal properties are established: structural recursion
                 can be expressed in first-order logic extended with
                 transitive closure; its data complexity is PTIME; and
                 over relational data it is a conservative extension of
                 the relational calculus. The underlying data model is
                 based on value equality, formally defined with
                 bisimulation. Structural recursion is shown to be
                 invariant with respect to value equality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "optimization; query language; semistructured data;
                 structural recursion; XML; XSL",
}

@Article{Mirbel:2000:CTI,
  author =       "Isabelle Mirbel and Barbara Pernici and Timos K.
                 Sellis and S. Tserkezoglou and Michalis Vazirgiannis",
  title =        "Checking the Temporal Integrity of Interactive
                 Multimedia Documents",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "2",
  pages =        "111--130",
  month =        jul,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:53 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Mirbel:Isabelle.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/p/Pernici:Barbara.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Sellis:Timos_K=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/t/Tserkezoglou:S=.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/v/Vazirgiannis:Michalis.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009002/00090111.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009002/00090111.pdf",
  abstract =     "When authoring multimedia scenarios, and in particular
                 scenarios with user interaction, where the sequence and
                 time of occurrence of interactions is not predefined,
                 it is difficult to guarantee the consistency of the
                 resulting scenarios. As a consequence, the {\em
                 execution\/} of the scenario may result in unexpected
                 behavior or inconsistent use of media. The present
                 paper proposes a methodology for checking the temporal
                 integrity of interactive multimedia document (IMD)
                 scenarios at authoring time at various levels. The IMD
                 flow is mainly defined by the events occurring during
                 the IMD session. Integrity checking consists of a set
                 of discrete steps, during which we transform the
                 scenario into temporal constraint networks representing
                 the constraints linking the different possible events
                 in the scenario. Temporal constraint verification
                 techniques are applied to verify the integrity of the
                 scenario, deriving a minimal network, showing possible
                 temporal relationships between events given a set of
                 constraints.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "constraint networks; multimedia presentation; temporal
                 integrity",
}

@Article{Candan:2000:VMM,
  author =       "K. Sel{\c{c}}uk Candan and Eric Lemar and V. S.
                 Subrahmanian",
  title =        "View management in multimedia databases",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "2",
  pages =        "131--153",
  month =        jul,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:53 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Candan:K=_Sel=ccedil=uk.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/l/Lemar:Eric.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/s/Subrahmanian:V=_S=.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009002/00090131.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009002/00090131.pdf",
  abstract =     "Though there has been extensive work on multimedia
                 databases in the last few years, there is no prevailing
                 notion of a multimedia view, nor there are techniques
                 to create, manage, and maintain such views. Visualizing
                 the results of a dynamic multimedia query or
                 materializing a dynamic multimedia view corresponds to
                 assembling and delivering an interactive multimedia
                 presentation in accordance with the visualization
                 specifications. In this paper, we suggest that a
                 non-interactive multimedia presentation is a set of
                 {\em virtual objects\/} with associated spatial and
                 temporal presentation constraints. A virtual object is
                 either an object, or the result of a query. As queries
                 may have different answers at different points in time,
                 scheduling the presentation of such objects is
                 nontrivial. We then develop a probabilistic model of
                 interactive multimedia presentations, extending the
                 non-interactive model described earlier. We also
                 develop a probabilistic model of interactive
                 visualization where the probabilities reflect the user
                 profiles, or the likelihood of certain user
                 interactions. Based on this probabilistic model, we
                 develop three utility-theoretic based types of
                 prefetching algorithms that anticipate how users will
                 interact with the presentation. These prefetching
                 algorithms allow efficient visualization of the query
                 results in accordance with the underlying
                 specification. We have built a prototype system that
                 incorporates these algorithms. We report on the results
                 of experiments conducted on top of this
                 implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "interactivity; multimedia databases; prefetching;
                 result visualization\slash presentation; view
                 management",
}

@Article{Fu:2000:DVT,
  author =       "Ada Wai-chee Fu and Polly Mei-shuen Chan and Yin-Ling
                 Cheung and Yiu Sang Moon",
  title =        "Dynamic vp-Tree Indexing for $n$-Nearest Neighbor
                 Search Given Pair-Wise Distances",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "2",
  pages =        "154--173",
  month =        jul,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:53 MDT 2008",
  bibsource =    "http://ftp.informatik.rwth-aachen.de/dblp/db/journals/vldb/vldb9.html;
                 http://link.springer.de/link/service/journals/00778/tocs/t0009002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Electronic edition.",
  URL =          "http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Chan:Polly_Mei=shuen.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/c/Cheung:Yin=Ling.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/f/Fu:Ada_Wai=Chee.html;
                 http://ftp.informatik.rwth-aachen.de/dblp/db/indices/a-tree/m/Moon:Yiu_Sang.html;
                 http://link.springer.de/link/service/journals/00778/bibs/0009002/00090154.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009002/00090154.pdf",
  abstract =     "For some multimedia applications, it has been found
                 that domain objects cannot be represented as feature
                 vectors in a multidimensional space. Instead, pair-wise
                 distances between data objects are the only input. To
                 support content-based retrieval, one approach maps each
                 object to a $k$dimensional ($k$d) point and tries to
                 preserve the distances among the points. Then, existing
                 spatial access index methods such as the R-trees and
                 KD-trees can support fast searching on the resulting
                 $k$d points. However, information loss is inevitable
                 with such an approach since the distances between data
                 objects can only be preserved to a certain extent. Here
                 we investigate the use of a distance-based indexing
                 method. In particular, we apply the vantage point tree
                 (vp-tree) method. There are two important problems for
                 the vp-tree method that warrant further investigation,
                 the $n$nearest neighbors search and the updating
                 mechanisms. We study an $n$nearest neighbors search
                 algorithm for the vp-tree, which is shown by
                 experiments to scale up well with the size of the
                 dataset and the desired number of nearest neighbors,
                 $n$. Experiments also show that the searching in the
                 vp-tree is more efficient than that for the $R^*$-tree
                 and the $M$-tree. Next, we propose solutions for the
                 update problem for the vp-tree, and show by experiments
                 that the algorithms are efficient and effective.
                 Finally, we investigate the problem of selecting
                 vantage-point, propose a few alternative methods, and
                 study their impact on the number of distance
                 computation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "content-based retrieval; indexing; nearest neighbor
                 search; pair-wise distances; updating",
}

@Article{Atkinson:2000:GE,
  author =       "Malcolm P. Atkinson",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "175--176",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090175.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090175.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bernstein:2000:CBP,
  author =       "Philip A. Bernstein and Shankar Pal and David Shutt",
  title =        "Context-based prefetch --- an optimization for
                 implementing objects on relations",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "177--189",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090177.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090177.pdf",
  abstract =     "When implementing persistent objects on a relational
                 database, a major performance issue is prefetching data
                 to minimize the number of round-trips to the database.
                 This is especially hard with navigational applications,
                 since future accesses are unpredictable. We propose the
                 use of the context in which an object is loaded as a
                 predictor of future accesses, where a context can be a
                 stored collection of relationships, a query result, or
                 a complex object. When an object O's state is loaded,
                 similar state for other objects in O's context is
                 prefetched. We present a design for maintaining context
                 and for using it to guide prefetch. We give performance
                 measurements of its implementation in Microsoft
                 Repository, showing up to a 70\% reduction in running
                 time. We describe several variations of the
                 optimization: selectively applying the technique based
                 on application and database characteristics, using
                 application-supplied performance hints, using
                 concurrent database queries to support asynchronous
                 prefetch, prefetching across relationship paths, and
                 delayed prefetch to save database round-trips.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "caching; object-oriented database; object-relational
                 mapping; prefetch",
}

@Article{Claussen:2000:EES,
  author =       "J. Claussen and A. Kemper and D. Kossmann and C.
                 Wiesner",
  title =        "Exploiting early sorting and early partitioning for
                 decision support query processing",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "190--213",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090190.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090190.pdf",
  abstract =     "Decision support queries typically involve several
                 joins, a grouping with aggregation, and/or sorting of
                 the result tuples. We propose two new classes of query
                 evaluation algorithms that can be used to speed up the
                 execution of such queries. The algorithms are based on
                 (1) {\em early sorting\/} and (2) {\em early
                 partitioning\/} --- or a combination of both. The idea
                 is to push the sorting and/or the partitioning to the
                 leaves, i.e., the base relations, of the query
                 evaluation plans (QEPs) and thereby avoid sorting or
                 partitioning large intermediate results generated by
                 the joins. Both early sorting and early partitioning
                 are used in combination with hash-based algorithms for
                 evaluating the join(s) and the grouping. To enable
                 early sorting, the sort order generated at an early
                 stage of the QEP is retained through an arbitrary
                 number of so-called {\em order-preserving hash joins}.
                 To make early partitioning applicable to a large class
                 of decision support queries, we generalize the
                 so-called hash teams proposed by Graefe et al. [GBC98].
                 Hash teams allow to perform several hash-based
                 operations (join and grouping) on the same attribute in
                 one pass without repartitioning intermediate results.
                 Our generalization consists of indirectly partitioning
                 the input data. Indirect partitioning means
                 partitioning the input data on an attribute that is not
                 directly needed for the next hash-based operation, and
                 it involves the construction of bitmaps to approximate
                 the partitioning for the attribute that is needed in
                 the next hash-based operation. Our performance
                 experiments show that such QEPs based on {\em early
                 sorting, early partitioning}, or both in combination
                 perform significantly better than conventional
                 strategies for many common classes of decision support
                 queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "decision support systems; early sorting and
                 partitioning; hash joins and hash teams; performance
                 evaluation; query processing and optimization",
}

@Article{Jagadish:2000:ODM,
  author =       "H. V. Jagadish and Olga Kapitskaia and Raymond T. Ng
                 and Divesh Srivastava",
  title =        "One-dimensional and multi-dimensional substring
                 selectivity estimation",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "214--230",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090214.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090214.pdf",
  abstract =     "With the increasing importance of XML, LDAP
                 directories, and text-based information sources on the
                 Internet, there is an ever-greater need to evaluate
                 queries involving (sub)string matching. In many cases,
                 matches need to be on multiple attributes/dimensions,
                 with correlations between the multiple dimensions.
                 Effective query optimization in this context requires
                 good selectivity estimates. In this paper, we use
                 pruned count-suffix trees (PSTs) as the basic data
                 structure for substring selectivity estimation. For the
                 1-D problem, we present a novel technique called MO
                 (Maximal Overlap). We then develop and analyze two 1-D
                 estimation algorithms, MOC and MOLC, based on MO and a
                 constraint-based characterization of all possible
                 completions of a given PST. For the $k$-D problem, we
                 first generalize PSTs to multiple dimensions and
                 develop a space- and time-efficient probabilistic
                 algorithm to construct $k$-D PSTs directly. We then
                 show how to extend MO to multiple dimensions. Finally,
                 we demonstrate, both analytically and experimentally,
                 that MO is both practical and substantially superior to
                 competing algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "maximal overlap; pruned count-suffix tree; short
                 memory property; string selectivity",
}

@Article{Manegold:2000:ODA,
  author =       "Stefan Manegold and Peter A. Boncz and Martin L.
                 Kersten",
  title =        "Optimizing database architecture for the new
                 bottleneck: memory access",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "231--246",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090231.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090231.pdf",
  abstract =     "In the past decade, advances in the speed of commodity
                 CPUs have far out-paced advances in memory latency.
                 Main-memory access is therefore increasingly a
                 performance bottleneck for many computer applications,
                 including database systems. In this article, we use a
                 simple scan test to show the severe impact of this
                 bottleneck. The insights gained are translated into
                 guidelines for database architecture, in terms of both
                 data structures and algorithms. We discuss how
                 vertically fragmented data structures optimize cache
                 performance on sequential data access. We then focus on
                 equi-join, typically a random-access operation, and
                 introduce radix algorithms for partitioned hash-join.
                 The performance of these algorithms is quantified using
                 a detailed analytical model that incorporates memory
                 access cost. Experiments that validate this model were
                 performed on the Monet database system. We obtained
                 exact statistics on events such as TLB misses and L1
                 and L2 cache misses by using hardware performance
                 counters found in modern CPUs. Using our cost model, we
                 show how the carefully tuned memory access pattern of
                 our radix algorithms makes them perform well, which is
                 confirmed by experimental results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "decomposed storage model; implementation techniques;
                 join algorithms; main-memory databases; memory access
                 optimization; query processing",
}

@Article{Raman:2000:ODR,
  author =       "Vijayshankar Raman and Bhaskaran Raman and Joseph M.
                 Hellerstein",
  title =        "Online dynamic reordering",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "247--260",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090247.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090247.pdf",
  abstract =     "We present a pipelining, dynamically tunable {\em
                 reorder\/} operator for providing user control during
                 long running, data-intensive operations. Users can see
                 partial results and accordingly direct the processing
                 by specifying preferences for various data items; data
                 of interest is prioritized for early processing. The
                 reordering mechanism is efficient and non-blocking and
                 can be used over arbitrary data streams from files and
                 indexes, as well as continuous data feeds. We also
                 investigate several policies for the reordering based
                 on the performance goals of various typical
                 applications. We present performance results for
                 reordering in the context of an online aggregation
                 implementation in Informix and in the context of
                 sorting and scrolling in a large-scale spreadsheet. Our
                 experiments demonstrate that for a variety of data
                 distributions and applications, reordering is
                 responsive to dynamic preference changes, imposes
                 minimal overheads in overall completion time, and
                 provides dramatic improvements in the quality of the
                 feedback over time. Surprisingly, preliminary
                 experiments indicate that online reordering can also be
                 useful in traditional batch query processing, because
                 it can serve as a form of pipelined, approximate
                 sorting.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Informix; interactive data processing; online
                 reordering; user control",
}

@Article{Tan:2000:PEN,
  author =       "Kian-Lee Tan and Cheng Hian Goh and Beng Chin Ooi",
  title =        "Progressive evaluation of nested aggregate queries",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "3",
  pages =        "261--278",
  month =        dec,
  year =         "2000",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:54 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t0009003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/0009003/00090261.htm;
                 http://link.springer.de/link/service/journals/00778/papers/0009003/00090261.pdf",
  abstract =     "In many decision-making scenarios, decision makers
                 require rapid feedback to their queries, which
                 typically involve aggregates. The traditional {\em
                 blocking execution model\/} can no longer meet the
                 demands of these users. One promising approach in the
                 literature, called {\em online aggregation}, evaluates
                 an aggregation query progressively as follows: as soon
                 as certain data have been evaluated, approximate
                 answers are produced with their respective running
                 confidence intervals; as more data are examined, the
                 answers and their corresponding running confidence
                 intervals are refined. In this paper, we extend this
                 approach to handle nested queries with aggregates
                 (i.e., at least one inner query block is an aggregate
                 query) by providing users with (approximate) answers
                 progressively as the inner aggregation query blocks are
                 evaluated. We address the new issues pose by nested
                 queries. In particular, the answer space begins with a
                 superset of the final answers and is refined as the
                 aggregates from the inner query blocks are refined. For
                 the intermediary answers to be meaningful, they have to
                 be interpreted with the aggregates from the inner
                 queries. We also propose a {\em multi-threaded model\/}
                 in evaluating such queries: each query block is
                 assigned to a thread, and the threads can be evaluated
                 concurrently and independently. The time slice across
                 the threads is {\em nondeterministic\/} in the sense
                 that the user controls the relative rate at which these
                 subqueries are being evaluated. For {\em enumerative\/}
                 nested queries, we propose a priority-based evaluation
                 strategy to present answers that are certainly in the
                 final answer space first, before presenting those whose
                 validity may be affected as the inner query aggregates
                 are refined. We implemented a prototype system using
                 Java and evaluated our system. Results for nested
                 queries with a level and multiple levels of nesting are
                 reported. Our results show the effectiveness of the
                 proposed mechanisms in providing progressive feedback
                 that reduces the initial waiting time of users
                 significantly without sacrificing the quality of the
                 answers.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate answers; multi-threading; nested aggregate
                 queries; online aggregation; progressive query
                 processing",
}

@Article{Ngu:2001:CMV,
  author =       "Anne H. H. Ngu and Quan Z. Sheng and Du Q. Huynh and
                 Ron Lei",
  title =        "Combining multi-visual features for efficient indexing
                 in a large image database",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "4",
  pages =        "279--293",
  month =        apr,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100028",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:55 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090279.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1009004/10090279.pdf",
  abstract =     "The optimized distance-based access methods currently
                 available for multidimensional indexing in multimedia
                 databases have been developed based on two major
                 assumptions: a suitable distance function is known a
                 priori and the dimensionality of the image features is
                 low. It is not trivial to define a distance function
                 that best mimics human visual perception regarding
                 image similarity measurements. Reducing
                 high-dimensional features in images using the popular
                 principle component analysis (PCA) might not always be
                 possible due to the non-linear correlations that may be
                 present in the feature vectors. We propose in this
                 paper a fast and robust hybrid method for non-linear
                 dimensions reduction of composite image features for
                 indexing in large image database. This method
                 incorporates both the PCA and non-linear neural network
                 techniques to reduce the dimensions of feature vectors
                 so that an optimized access method can be applied. To
                 incorporate human visual perception into our system, we
                 also conducted experiments that involved a number of
                 subjects classifying images into different classes for
                 neural network training. We demonstrate that not only
                 can our neural network system reduce the dimensions of
                 the feature vectors, but that the reduced dimensional
                 feature vectors can also be mapped to an optimized
                 access method for fast and accurate indexing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "high-dimensional indexing; image retrieval; neural
                 network",
}

@Article{Combi:2001:HTD,
  author =       "Carlo Combi and Giuseppe Pozzi",
  title =        "{{\em HMAP\/}} --- a temporal data model managing
                 intervals with different granularities and
                 indeterminacy from natural language sentences",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "4",
  pages =        "294--311",
  month =        apr,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100033",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:55 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090294.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1009004/10090294.pdf",
  abstract =     "The {\em granularity\/} of given temporal information
                 is the level of abstraction at which information is
                 expressed. Different units of measure allow one to
                 represent different granularities. Indeterminacy is
                 often present in temporal information given at
                 different granularities: temporal {\em indeterminacy\/}
                 is related to incomplete knowledge of when the
                 considered fact happened. Focusing on temporal
                 databases, different granularities and indeterminacy
                 have to be considered in expressing valid time, i.e.,
                 the time at which the information is true in the
                 modeled reality. In this paper, we propose {\em HMAP\/}
                 (The term is the transliteration of an ancient Greek
                 poetical word meaning ``day''.), a temporal data model
                 extending the capability of defining valid times with
                 different granularity and/or with indeterminacy. In
                 {\em HMAP}, absolute intervals are explicitly
                 represented by their {\em start}, {\em end}, and {\em
                 duration\/}: in this way, we can represent valid times
                 as ``in December 1998 for five hours'', ``from July
                 1995, for 15 days'', ``from March 1997 to October 15,
                 1997, between 6 and 6:30 p.m.''. {\em HMAP\/} is based
                 on a three-valued logic, for managing uncertainty in
                 temporal relationships. Formulas involving different
                 temporal relationships between intervals, instants, and
                 durations can be defined, allowing one to query the
                 database with different granularities, not necessarily
                 related to that of data. In this paper, we also discuss
                 the complexity of algorithms, allowing us to evaluate
                 {\em HMAP\/} formulas, and show that the formulas can
                 be expressed as constraint networks falling into the
                 class of simple temporal problems, which can be solved
                 in polynomial time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "temporal databases; three-valued logic; time
                 granularity; time indeterminacy",
}

@Article{Li:2001:SEM,
  author =       "Wen-Syan Li and K. Sel{\c{c}}uk Candan and Kyoji
                 Hirata and Yoshinori Hara",
  title =        "Supporting efficient multimedia database exploration",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "4",
  pages =        "312--326",
  month =        apr,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100040",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:55 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090312.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1009004/10090312.pdf",
  abstract =     "Due to the fuzziness of query specification and media
                 matching, multimedia retrieval is conducted by way of
                 exploration. It is essential to provide feedback so
                 that users can visualize query reformulation
                 alternatives and database content distribution. Since
                 media matching is an expensive task, another issue is
                 how to efficiently support exploration so that the
                 system is not overloaded by perpetual query
                 reformulation. In this paper, we present a uniform
                 framework to represent statistical information of both
                 semantics and visual metadata for images in the
                 databases. We propose the concept of {\em query
                 verification}, which evaluates queries using
                 statistics, and provides users with feedback, including
                 the strictness and reformulation alternatives of each
                 query condition as well as estimated numbers of
                 matches. With query verification, the system increases
                 the efficiency of the multimedia database exploration
                 for both users and the system. Such statistical
                 information is also utilized to support progressive
                 query processing and query relaxation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "exploration; human computer interaction; multimedia
                 database; progressive processing; query relaxation;
                 selectivity statistics",
}

@Article{Lee:2001:GTM,
  author =       "Chiang Lee and Chi-Sheng Shih and Yaw-Huei Chen",
  title =        "A graph-theoretic model for optimizing queries
                 involving methods",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "4",
  pages =        "327--343",
  month =        apr,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100035",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:55 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090327.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1009004/10090327.pdf",
  abstract =     "Traditional algorithms for optimizing the execution
                 order of joins are no more valid when selections and
                 projections involve methods and become very expensive
                 operations. Selections and projections could be even
                 more costly than joins such that they are pulled above
                 joins, rather than pushed down in a query tree. In this
                 paper, we take a fundamental look at how to approach
                 query optimization from a top-down design perspective,
                 rather than trying to force one model to fit into
                 another. We present a graph model which is designed to
                 characterize execution plans. Each edge and each vertex
                 of the graph is assigned a weight to model execution
                 plans. We also design algorithms that use these weights
                 to optimize the execution order of operations. A cost
                 model of these algorithms is developed. Experiments are
                 conducted on the basis of this cost model. The results
                 show that our algorithms are superior to similar work
                 proposed in the literature.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "graph model; method query; object-oriented databases;
                 query optimization; spanning tree",
}

@Article{Wang:2001:IVH,
  author =       "Changzhou Wang and X. Sean Wang",
  title =        "Indexing very high-dimensional sparse and quasi-sparse
                 vectors for similarity searches",
  journal =      j-VLDB-J,
  volume =       "9",
  number =       "4",
  pages =        "344--361",
  month =        apr,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100036",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:55 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1009004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1009004/10090344.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1009004/10090344.pdf",
  abstract =     "Similarity queries on complex objects are usually
                 translated into searches among their feature vectors.
                 This paper studies indexing techniques for very
                 high-dimensional (e.g., in hundreds) vectors that are
                 sparse or quasi-sparse, i.e., vectors {\em each\/}
                 having only a small number (e.g., ten) of non-zero or
                 significant values. Based on the R-tree, the paper
                 introduces the xS-tree that uses lossy compression of
                 bounding regions to guarantee a reasonable minimum
                 fan-out within the allocated storage space for each
                 node. In addition, the paper studies the performance
                 and scalability of the xS-tree via experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "high-dimensional indexing structure; lossy
                 compression; quasi-sparse vector; similarity search;
                 sparse vector",
}

@Article{Casati:2001:GE,
  author =       "Fabio Casati and Ming-Chien Shan and Dimitrios
                 Georgakopoulos",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "1--1",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100041",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100001.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mecella:2001:DWC,
  author =       "Massimo Mecella and Barbara Pernici",
  title =        "Designing wrapper components for e-services in
                 integrating heterogeneous systems",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "2--15",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100044",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100002.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100002.pdf",
  abstract =     "Component-based approaches are becoming more and more
                 popular to support Internet-based application
                 development. Different component modeling approaches,
                 however, can be adopted, obtaining different
                 abstraction levels (either conceptual or operational).
                 In this paper we present a component-based architecture
                 for the design of e-applications, and discuss the
                 concept of wrapper components as building blocks for
                 the development of e-services, where these services are
                 based on legacy systems. We discuss their
                 characteristics and their applicability in
                 Internet-based application development.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "component; cooperation; e-application; e-service;
                 integration; legacy system; wrapper",
}

@Article{Eyal:2001:ICH,
  author =       "Anat Eyal and Tova Milo",
  title =        "Integrating and customizing heterogeneous e-commerce
                 applications",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "16--38",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100045",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100016.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100016.pdf",
  abstract =     "A broad spectrum of electronic commerce applications
                 is currently available on the Web, providing services
                 in almost any area one can think of. As the number and
                 variety of such applications grow, more business
                 opportunities emerge for providing new services based
                 on the integration and customization of existing
                 applications. (Web shopping malls and support for
                 comparative shopping are just a couple of examples.)
                 Unfortunately, the diversity of applications in each
                 specific domain and the disparity of interfaces,
                 application flows, actor roles in the business
                 transaction, and data formats, renders the integration
                 and manipulation of applications a rather difficult
                 task. In this paper we present the {\em Application
                 Manifold\/} system, aimed at simplifying the intricate
                 task of integration and customization of e-commerce
                 applications. The scope of the work in this paper is
                 limited to web-enabled e-commerce applications. We do
                 not support the integration/customization of
                 proprietary/legacy applications. The wrapping of such
                 applications as web services is complementary to our
                 work. Based on the emerging Web data standard, XML, and
                 application modeling standard, UML, the system offers a
                 novel declarative specification language for describing
                 the integration/customization task, supporting a
                 modular approach where new applications can be added
                 and integrated at will with minimal effort. Then,
                 acting as an application generator, the system
                 generates a full integrated/customized e-commerce
                 application, with the declarativity of the
                 specification allowing for the optimization and
                 verification of the generated application. The
                 integration here deals with the full profile of the
                 given e-commerce applications: the various services
                 offered by the applications, the activities and roles
                 of the different actors participating in the
                 application (e.g., customers, vendors), the application
                 flow, as well as with the data involved in the process.
                 This is in contrast to previous works on Web data
                 integration that focused primarily on querying the data
                 available in the applications, mostly ignoring the
                 additional aspects mentioned above.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "application integration; data integration; electronic
                 commerce",
}

@Article{Bonifati:2001:ARX,
  author =       "Angela Bonifati and Stefano Ceri and Stefano
                 Paraboschi",
  title =        "Active rules for {XML}: a new paradigm for
                 {E}-services",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "39--47",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100039",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100039.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100039.pdf",
  abstract =     "XML is rapidly becoming one of the most widely adopted
                 technologies for information exchange and
                 representation. As the use of XML becomes more
                 widespread, we foresee the development of active XML
                 rules, i.e., rules explicitly designed for the
                 management of XML information. In particular, we argue
                 that active rules for XML offer a natural paradigm for
                 the rapid development of innovative e-services. In the
                 paper, we show how active rules can be specified in the
                 context of XSLT, a pattern-based language for
                 publishing XML documents (promoted by the W3C) which is
                 receiving strong commercial support, and Lorel, a query
                 language for XML documents that is quite popular in the
                 research world. We demonstrate, through simple examples
                 of active rules for XSLT and Lorel, that active rules
                 can be effective for the implementation of e-commerce
                 services. We also discuss the various issues that need
                 to be considered in adapting the notion of relational
                 triggers to the XML context.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "active databases; document management; query languages
                 for XML; XML; XSLT",
}

@Article{Braumandl:2001:OUQ,
  author =       "R. Braumandl and M. Keidl and A. Kemper and D.
                 Kossmann and A. Kreutz and S. Seltzsam and K. Stocker",
  title =        "{ObjectGlobe}: {Ubiquitous} query processing on the
                 {Internet}",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "48--71",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100043",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100048.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100048.pdf",
  abstract =     "We present the design of ObjectGlobe, a distributed
                 and open query processor for Internet data sources.
                 Today, data is published on the Internet via Web
                 servers which have, if at all, very localized query
                 processing capabilities. The goal of the ObjectGlobe
                 project is to establish an open marketplace in which
                 {\em data\/} and {\em query processing capabilities\/}
                 can be distributed and used by any kind of Internet
                 application. Furthermore, ObjectGlobe integrates {\em
                 cycle providers\/} (i.e., machines) which carry out
                 query processing operators. The overall picture is to
                 make it possible to execute a query with --- in
                 principle --- unrelated query operators, cycle
                 providers, and data sources. Such an infrastructure can
                 serve as enabling technology for scalable e-commerce
                 applications, e.g., B2B and B2C market places, to be
                 able to integrate data and data processing operations
                 of a large number of participants. One of the main
                 challenges in the design of such an open system is to
                 ensure privacy and security. We discuss the ObjectGlobe
                 security requirements, show how basic components such
                 as the optimizer and runtime system need to be
                 extended, and present the results of performance
                 experiments that assess the additional cost for secure
                 distributed query processing. Another challenge is
                 quality of service management so that users can
                 constrain the costs and running times of their
                 queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cycle-; distributed query processing; function- and
                 data provider; open systems; privacy; quality of
                 service; query optimization; security",
}

@Article{Su:2001:IBN,
  author =       "Stanley Y. W. Su and Chunbo Huang and Joachim Hammer
                 and Yihua Huang and Haifei Li and Liu Wang and Youzhong
                 Liu and Charnyote Pluempitiwiriyawej and Minsoo Lee and
                 Herman Lam",
  title =        "An {Internet}-based negotiation server for
                 e-commerce",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "72--90",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100051",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100072.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100072.pdf",
  abstract =     "This paper describes the design and implementation of
                 a replicable, Internet-based negotiation server for
                 conducting bargaining-type negotiations between
                 enterprises involved in e-commerce and e-business.
                 Enterprises can be buyers and sellers of
                 products/services or participants of a complex supply
                 chain engaged in purchasing, planning, and scheduling.
                 Multiple copies of our server can be installed to
                 complement the services of Web servers. Each enterprise
                 can install or select a trusted negotiation server to
                 represent his/her interests. Web-based GUI tools are
                 used during the build-time registration process to
                 specify the requirements, constraints, and rules that
                 represent negotiation policies and strategies,
                 preference scoring of different data conditions, and
                 aggregation methods for deriving a global cost-benefit
                 score for the item(s) under negotiation. The
                 registration information is used by the negotiation
                 servers to automatically conduct bargaining type
                 negotiations on behalf of their clients. In this paper,
                 we present the architecture of our implementation as
                 well as a framework for automated negotiations, and
                 describe a number of communication primitives which are
                 used in the underlying negotiation protocol. A
                 constraint satisfaction processor (CSP) is used to
                 evaluate a negotiation proposal or counterproposal
                 against the registered requirements and constraints of
                 a client company. In case of a constraint violation, an
                 event is posted to trigger the execution of negotiation
                 strategic rules, which either automatically relax the
                 violated constraint, ask for human intervention, invoke
                 an application, or perform other remedial operations.
                 An Event-Trigger-Rule (ETR) server is used to manage
                 events, triggers, and rules. Negotiation strategic
                 rules can be added or modified at run-time. A
                 cost-benefit analysis component is used to perform
                 quantitative analysis of alternatives. The use of
                 negotiation servers to conduct automated negotiation
                 has been demonstrated in the context of an integrated
                 supply chain scenario.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "constraint evaluation; cost-benefit analysis;
                 database; e-commerce; negotiation policy and strategy;
                 negotiation protocol",
}

@Article{Shegalov:2001:XEW,
  author =       "German Shegalov and Michael Gillmann and Gerhard
                 Weikum",
  title =        "{XML}-enabled workflow management for e-services
                 across heterogeneous platforms",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "91--103",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100038",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100091.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100091.pdf",
  abstract =     "Advanced e-services require efficient, flexible, and
                 easy-to-use workflow technology that integrates well
                 with mainstream Internet technologies such as XML and
                 Web servers. This paper discusses an XML-enabled
                 architecture for distributed workflow management that
                 is implemented in the latest version of our Mentor-lite
                 prototype system. The key asset of this architecture is
                 an XML mediator that handles the exchange of business
                 and flow control data between workflow and
                 business-object servers on the one hand and client
                 activities on the other via XML messages over http. Our
                 implementation of the mediator has made use of Oracle's
                 XSQL servlet. The major benefit of the advocated
                 architecture is that it provides seamless integration
                 of client applications into e-service workflows with
                 scalable efficiency and very little explicit coding, in
                 contrast to an earlier, Java-based, version of our
                 Mentor-lite prototype that required much more code and
                 exhibited potential performance problems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "business processes; information system
                 interoperability; Internet e-services; workflow
                 management; XML/XSL",
}

@Article{Datta:2001:ASS,
  author =       "Anindya Datta and Kaushik Dutta and Debra VanderMeer
                 and Krithi Ramamritham and Shamkant B. Navathe",
  title =        "An architecture to support scalable online
                 personalization on the {Web}",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "1",
  pages =        "104--117",
  month =        aug,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100037",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:56 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010001/10100104.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010001/10100104.pdf",
  abstract =     "Online personalization is of great interest to
                 e-companies. Virtually all personalization technologies
                 are based on the idea of storing as much historical
                 customer session data as possible, and then querying
                 the data store as customers navigate through a web
                 site. The holy grail of online personalization is an
                 environment where fine-grained, detailed historical
                 session data can be queried based on current online
                 navigation patterns for use in formulating real-time
                 responses. Unfortunately, as more consumers become
                 e-shoppers, the user load and the amount of historical
                 data continue to increase, causing scalability-related
                 problems for almost all current personalization
                 technologies. This paper chronicles the development of
                 a real-time interaction management system through the
                 integration of historical data and online visitation
                 patterns of e-commerce site visitors. It describes the
                 scientific underpinnings of the system as well as its
                 architecture. Experimental evaluation of the system
                 shows that the caching and storage techniques built
                 into the system deliver performance that is orders of
                 magnitude better than those derived from off-the-shelf
                 database components.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "behavior-based personalization; dynamic lookahead
                 profile; profile caching; scalable online
                 personalization; Web site and interaction model",
}

@Article{ElAbbadi:2001:GE,
  author =       "Amr {El Abbadi} and Gunter Schlageter and Kyu-Young
                 Whang",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "119--119",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100053",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100119.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100119.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pucheral:2001:PSD,
  author =       "Philippe Pucheral and Luc Bouganim and Patrick
                 Valduriez and Christophe Bobineau",
  title =        "{PicoDBMS}: {Scaling} down database techniques for the
                 smartcard",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "120--132",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100047",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100120.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100120.pdf",
  abstract =     "Smartcards are the most secure portable computing
                 device today. They have been used successfully in
                 applications involving money, and proprietary and
                 personal data (such as banking, healthcare, insurance,
                 etc.). As smartcards get more powerful (with 32-bit CPU
                 and more than 1 MB of stable memory in the next
                 versions) and become multi-application, the need for
                 database management arises. However, smartcards have
                 severe hardware limitations (very slow write, very
                 little RAM, constrained stable memory, no autonomy,
                 etc.) which make traditional database technology
                 irrelevant. The major problem is scaling down database
                 techniques so they perform well under these
                 limitations. In this paper, we give an in-depth
                 analysis of this problem and propose a PicoDBMS
                 solution based on highly compact data structures, query
                 execution without RAM, and specific techniques for
                 atomicity and durability. We show the effectiveness of
                 our techniques through performance evaluation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "atomicity; durability; execution model; PicoDBMS;
                 query optimization; smartcard applications; storage
                 model",
}

@Article{Shanmugasundaram:2001:EPR,
  author =       "Jayavel Shanmugasundaram and Eugene Shekita and Rimon
                 Barr and Michael Carey and Bruce Lindsay and Hamid
                 Pirahesh and Berthold Reinwald",
  title =        "Efficiently publishing relational data as {XML}
                 documents",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "133--154",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100052",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100133.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100133.pdf",
  abstract =     "XML is rapidly emerging as a standard for exchanging
                 business data on the World Wide Web. For the
                 foreseeable future, however, most business data will
                 continue to be stored in relational database systems.
                 Consequently, if XML is to fulfill its potential, some
                 mechanism is needed to publish relational data as XML
                 documents. Towards that goal, one of the major
                 challenges is finding a way to efficiently structure
                 and tag data from one or more tables as a hierarchical
                 XML document. Different alternatives are possible
                 depending on when this processing takes place and how
                 much of it is done inside the relational engine. In
                 this paper, we characterize and study the performance
                 of these alternatives. Among other things, we explore
                 the use of new scalar and aggregate functions in SQL
                 for constructing complex XML documents directly in the
                 relational engine. We also explore different execution
                 plans for generating the content of an XML document.
                 The results of an experimental study show that
                 constructing XML documents inside the relational engine
                 can have a significant performance benefit. Our results
                 also show the superiority of having the relational
                 engine use what we call an ``outer union plan'' to
                 generate the content of an XML document.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "publishing; relational databases; XML",
}

@Article{Chang:2001:AQM,
  author =       "Kevin Chen-Chuan Chang and H{\'e}ctor
                 Garc{\'\i}a-Molina",
  title =        "Approximate query mapping: {Accounting} for
                 translation closeness",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "155--181",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100042",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100155.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100155.pdf",
  abstract =     "In this paper we present a mechanism for approximately
                 translating Boolean query constraints across
                 heterogeneous information sources. Achieving the best
                 translation is challenging because sources support
                 different constraints for formulating queries, and
                 often these constraints cannot be precisely translated.
                 For instance, a query [score>8] might be ``perfectly''
                 translated as [rating>0.8] at some site, but can only
                 be approximated as [grade=A] at another. Unlike other
                 work, our general framework adopts a customizable
                 ``closeness'' metric for the translation that combines
                 both precision and recall. Our results show that for
                 query translation we need to handle interdependencies
                 among both query conjuncts as well as disjuncts. As the
                 basis, we identify the essential requirements of a rule
                 system for users to encode the mappings for atomic
                 semantic units. Our algorithm then translates complex
                 queries by rewriting them in terms of the semantic
                 units. We show that, under practical assumptions, our
                 algorithm generates the best approximate translations
                 with respect to the closeness metric of choice. We also
                 present a case study to show how our technique may be
                 applied in practice.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate query translation; closeness;
                 constraint-mapping; information integration;
                 mediators",
}

@Article{Pottinger:2001:MSA,
  author =       "Rachel Pottinger and Alon Halevy",
  title =        "{MiniCon}: a scalable algorithm for answering
                 queries using views",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "182--198",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100048",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100182.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100182.pdf",
  abstract =     "The problem of answering queries using views is to
                 find efficient methods of answering a query using a set
                 of previously materialized views over the database,
                 rather than accessing the database relations. The
                 problem has received significant attention because of
                 its relevance to a wide variety of data management
                 problems, such as data integration, query optimization,
                 and the maintenance of physical data independence. To
                 date, the performance of proposed algorithms has
                 received very little attention, and in particular,
                 their scale up in the presence of a large number of
                 views is unknown. We first analyze two previous
                 algorithms, the bucket algorithm and the inverse-rules,
                 and show their deficiencies. We then describe the
                 MiniCon, a novel algorithm for finding the
                 maximally-contained rewriting of a conjunctive query
                 using a set of conjunctive views. We present the first
                 experimental study of algorithms for answering queries
                 using views. The study shows that the MiniCon scales up
                 well and significantly outperforms the previous
                 algorithms. We describe an extension of the MiniCon to
                 handle comparison predicates, and show its performance
                 experimentally. Finally, we describe how the MiniCon
                 can be extended to the context of query optimization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data integration; materialized views; query
                 optimization; Web and databases",
}

@Article{Chakrabarti:2001:AQP,
  author =       "Kaushik Chakrabarti and Minos Garofalakis and Rajeev
                 Rastogi and Kyuseok Shim",
  title =        "Approximate query processing using wavelets",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "199--223",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100049",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100199.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100199.pdf",
  abstract =     "Approximate query processing has emerged as a
                 cost-effective approach for dealing with the huge data
                 volumes and stringent response-time requirements of
                 today's decision support systems (DSS). Most work in
                 this area, however, has so far been limited in its
                 query processing scope, typically focusing on specific
                 forms of aggregate queries. Furthermore, conventional
                 approaches based on sampling or histograms appear to be
                 inherently limited when it comes to approximating the
                 results of complex queries over high-dimensional DSS
                 data sets. In this paper, we propose the use of
                 multi-dimensional wavelets as an effective tool for
                 general-purpose approximate query processing in modern,
                 high-dimensional applications. Our approach is based on
                 building {\em wavelet-coefficient synopses\/} of the
                 data and using these synopses to provide approximate
                 answers to queries. We develop novel query processing
                 algorithms that operate directly on the
                 wavelet-coefficient synopses of relational tables,
                 allowing us to process arbitrarily complex queries {\em
                 entirely\/} in the wavelet-coefficient domain. This
                 guarantees extremely fast response times since our
                 approximate query execution engine can do the bulk of
                 its processing over compact sets of wavelet
                 coefficients, essentially postponing the expansion into
                 relational tuples until the end-result of the query. We
                 also propose a novel wavelet decomposition algorithm
                 that can build these synopses in an I/O-efficient
                 manner. Finally, we conduct an extensive experimental
                 study with synthetic as well as real-life data sets to
                 determine the effectiveness of our wavelet-based
                 approach compared to sampling and histograms. Our
                 results demonstrate that our techniques: (1) provide
                 approximate answers of better quality than either
                 sampling or histograms; (2) offer query execution-time
                 speedups of more than two orders of magnitude; and (3)
                 guarantee extremely fast synopsis construction times
                 that scale linearly with the size of the data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate query answers; data synopses; query
                 processing; wavelet decomposition",
}

@Article{Sarawagi:2001:UCM,
  author =       "Sunita Sarawagi",
  title =        "User-cognizant multidimensional analysis",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "2--3",
  pages =        "224--239",
  month =        sep,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100046",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:58 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010002/10100224.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010002/10100224.pdf",
  abstract =     "Our goal is to enhance multidimensional database
                 systems with a suite of advanced operators to automate
                 data analysis tasks that are currently handled through
                 manual exploration. In this paper, we present a key
                 component of our system that characterizes the
                 information content of a cell based on a user's prior
                 familiarity with the cube and provides a
                 context-sensitive exploration of the cube. There are
                 three main modules of this component. A Tracker, that
                 continuously tracks the parts of the cube that a user
                 has visited. A Modeler, that pieces together the
                 information in the visited parts to model the user's
                 expected values in the unvisited parts. An Informer,
                 that processes user's queries about the most
                 informative unvisited parts of the cube. The
                 mathematical basis for the expected value modeling is
                 provided by the classical maximum entropy principle.
                 Accordingly, the expected values are computed so as to
                 agree with every value that is already visited while
                 reducing assumptions about unvisited values to the
                 minimum by maximizing their entropy. The most
                 informative values are defined as those that bring the
                 new expected values closest to the actual values. We
                 believe and prove through experiments that such a
                 user-in-the-loop exploration will enable much faster
                 assimilation of all significant information in the data
                 compared to existing manual explorations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "maximum entropy; multidimensional data exploration;
                 OLAP; personalized mining; user-sensitive interest
                 measure",
}

@Article{Turker:2001:SIS,
  author =       "Can T{\"u}rker and Michael Gertz",
  title =        "Semantic integrity support in {SQL:1999} and
                 commercial (object-)relational database management
                 systems",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "4",
  pages =        "241--269",
  month =        dec,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100050",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:59 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100241.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010004/10100241.pdf",
  abstract =     "The correctness of the data managed by database
                 systems is vital to any application that utilizes data
                 for business, research, and decision-making purposes.
                 To guard databases against erroneous data not
                 reflecting real-world data or business rules, semantic
                 integrity constraints can be specified during database
                 design. Current commercial database management systems
                 provide various means to implement mechanisms to
                 enforce semantic integrity constraints at database
                 run-time. In this paper, we give an overview of the
                 semantic integrity support in the most recent
                 SQL-standard SQL:1999, and we show to what extent the
                 different concepts and language constructs proposed in
                 this standard can be found in major commercial
                 (object-)relational database management systems. In
                 addition, we discuss general design guidelines that
                 point out how the semantic integrity features provided
                 by these systems should be utilized in order to
                 implement an effective integrity enforcing subsystem
                 for a database.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "constraint enforcement; object-relational databases;
                 semantic integrity constraints; SQL:1999",
}

@Article{Halevy:2001:AQU,
  author =       "Alon Y. Halevy",
  title =        "Answering queries using views: a survey",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "4",
  pages =        "270--294",
  month =        dec,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100054",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:59 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100270.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010004/10100270.pdf",
  abstract =     "The problem of answering queries using views is to
                 find efficient methods of answering a query using a set
                 of previously defined materialized views over the
                 database, rather than accessing the database relations.
                 The problem has recently received significant attention
                 because of its relevance to a wide variety of data
                 management problems. In query optimization, finding a
                 rewriting of a query using a set of materialized views
                 can yield a more efficient query execution plan. To
                 support the separation of the logical and physical
                 views of data, a storage schema can be described using
                 views over the logical schema. As a result, finding a
                 query execution plan that accesses the storage amounts
                 to solving the problem of answering queries using
                 views. Finally, the problem arises in data integration
                 systems, where data sources can be described as
                 precomputed views over a mediated schema. This article
                 surveys the state of the art on the problem of
                 answering queries using views, and synthesizes the
                 disparate works into a coherent framework. We describe
                 the different applications of the problem, the
                 algorithms proposed to solve it and the relevant
                 theoretical results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data integration; date warehousing; materialized
                 views; query optimization; survey; Web-site
                 management",
}

@Article{Laurent:2001:MCI,
  author =       "D. Laurent and J. Lechtenb{\"o}rger and N. Spyratos
                 and G. Vossen",
  title =        "Monotonic complements for independent data
                 warehouses",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "4",
  pages =        "295--315",
  month =        dec,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100055",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:59 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100295.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010004/10100295.pdf",
  abstract =     "Views over databases have regained attention in the
                 context of data warehouses, which are seen as {\em
                 materialized\/} views. In this setting, efficient view
                 maintenance is an important issue, for which the notion
                 of {\em self-maintainability\/} has been identified as
                 desirable. In this paper, we extend the concept of
                 self-maintainability to (query and update) {\em
                 independence\/} within a formal framework, where
                 independence with respect to arbitrary given sets of
                 queries and updates over the sources can be guaranteed.
                 To this end we establish an intuitively appealing
                 connection between warehouse independence and {\em view
                 complements}. Moreover, we study special kinds of
                 complements, namely {\em monotonic complements}, and
                 show how to compute minimal ones in the presence of
                 keys and foreign keys in the underlying databases.
                 Taking advantage of these complements, an algorithmic
                 approach is proposed for the specification of
                 independent warehouses with respect to given sets of
                 queries and updates.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehouse; independence; materialized view;
                 self-maintainability; view complement",
}

@Article{Grefen:2001:GTS,
  author =       "Paul Grefen and Jochem Vonk and Peter Apers",
  title =        "Global transaction support for workflow management
                 systems: from formal specification to practical
                 implementation",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "4",
  pages =        "316--333",
  month =        dec,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100056",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:59 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100316.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010004/10100316.pdf",
  abstract =     "In this paper, we present an approach to global
                 transaction management in workflow environments. The
                 transaction mechanism is based on the well-known notion
                 of compensation, but extended to deal with both
                 arbitrary process structures to allow cycles in
                 processes and safepoints to allow partial compensation
                 of processes. We present a formal specification of the
                 transaction model and transaction management algorithms
                 in set and graph theory, providing clear, unambiguous
                 transaction semantics. The specification is
                 straightforwardly mapped to a modular architecture, the
                 implementation of which is first applied in a testing
                 environment, then in the prototype of a commercial
                 workflow management system. The modular nature of the
                 resulting system allows easy distribution using
                 middleware technology. The path from abstract semantics
                 specification to concrete, real-world implementation of
                 a workflow transaction mechanism is thus covered in a
                 complete and coherent fashion. As such, this paper
                 provides a complete framework for the application of
                 well-founded transactional workflows.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "compensation; long-running transaction; transaction
                 management; workflow management",
}

@Article{Rahm:2001:SAA,
  author =       "Erhard Rahm and Philip A. Bernstein",
  title =        "A survey of approaches to automatic schema matching",
  journal =      j-VLDB-J,
  volume =       "10",
  number =       "4",
  pages =        "334--350",
  month =        dec,
  year =         "2001",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100057",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:50:59 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t1010004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/1010004/10100334.htm;
                 http://link.springer.de/link/service/journals/00778/papers/1010004/10100334.pdf",
  abstract =     "Schema matching is a basic problem in many database
                 application domains, such as data integration,
                 E-business, data warehousing, and semantic query
                 processing. In current implementations, schema matching
                 is typically performed manually, which has significant
                 limitations. On the other hand, previous research
                 papers have proposed many techniques to achieve a
                 partial automation of the match operation for specific
                 application domains. We present a taxonomy that covers
                 many of these existing approaches, and we describe the
                 approaches in some detail. In particular, we
                 distinguish between schema-level and instance-level,
                 element-level and structure-level, and language-based
                 and constraint-based matchers. Based on our
                 classification we review some previous match
                 implementations thereby indicating which part of the
                 solution space they cover. We intend our taxonomy and
                 review of past work to be useful when comparing
                 different approaches to schema matching, when
                 developing a new match algorithm, and when implementing
                 a schema matching component.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "graph matching; machine learning; model management;
                 schema integration; schema matching",
}

@Article{Saltenis:2002:INR,
  author =       "Simonas {\v{S}}altenis and Christian S. Jensen",
  title =        "Indexing of now-relative spatio-bitemporal data",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "1--16",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100058",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110001.pdf",
  abstract =     "Real-world entities are inherently spatially and
                 temporally referenced, and database applications
                 increasingly exploit databases that record the past,
                 present, and anticipated future locations of entities,
                 e.g., the residences of customers obtained by the
                 geo-coding of addresses. Indices that efficiently
                 support queries on the spatio-temporal extents of such
                 entities are needed. However, past indexing research
                 has progressed in largely separate spatial and temporal
                 streams. Adding time dimensions to spatial indices, as
                 if time were a spatial dimension, neither supports nor
                 exploits the special properties of time. On the other
                 hand, temporal indices are generally not amenable to
                 extension with spatial dimensions. This paper proposes
                 the first efficient and versatile index for a general
                 class of spatio-temporal data: the discretely changing
                 spatial aspect of an object may be a point or may have
                 an extent; both transaction time and valid time are
                 supported, and a generalized notion of the current
                 time, {\em now}, is accommodated for both temporal
                 dimensions. The index is based on the R$^*$-tree and
                 provides means of prioritizing space versus time, which
                 enables it to adapt to spatially and temporally
                 restrictive queries. Performance experiments are
                 reported that evaluate pertinent aspects of the
                 index.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access method; bitemporal data; multidimensional
                 indexing; R-tree; spatio-temporal data; transaction
                 time; valid time",
}

@Article{Rafiei:2002:ERS,
  author =       "Davood Rafiei and Alberto O. Mendelzon",
  title =        "Efficient retrieval of similar shapes",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "17--27",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780100059",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110017.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110017.pdf",
  abstract =     "We propose an indexing technique for the fast
                 retrieval of objects in 2D images based on similarity
                 between their boundary shapes. Our technique is robust
                 in the presence of noise and supports several important
                 notions of similarity including optimal matches
                 irrespective of variations in orientation and/or
                 position. Our method can also handle size-invariant
                 matches using a normalization technique, although
                 optimality is not guaranteed here. We implemented our
                 method and performed experiments on real (hand-written
                 digits) data. Our experimental results showed the
                 superiority of our method compared to search based on
                 sequential scanning, which is the only obvious
                 competitor. The performance gain of our method
                 increases with any increase in the number or the size
                 of shapes.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Fourier descriptors; image databases; shape retrieval;
                 similarity queries; similarity retrieval",
}

@Article{Navarro:2002:SMS,
  author =       "Gonzalo Navarro",
  title =        "Searching in metric spaces by spatial approximation",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "28--46",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780200060",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110028.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110028.pdf",
  abstract =     "We propose a new data structure to search in metric
                 spaces. A {\em metric space\/} is formed by a
                 collection of objects and a {\em distance function\/}
                 defined among them which satisfies the triangle
                 inequality. The goal is, given a set of objects and a
                 query, retrieve those objects close enough to the
                 query. The complexity measure is the number of
                 distances computed to achieve this goal. Our data
                 structure, called {\em sa-tree\/} (``spatial
                 approximation tree''), is based on approaching the
                 searched objects spatially, that is, getting closer and
                 closer to them, rather than the classic
                 divide-and-conquer approach of other data structures.
                 We analyze our method and show that the number of
                 distance evaluations to search among $n$ objects is
                 sublinear. We show experimentally that the {\em
                 sa-tree\/} is the best existing technique when the
                 metric space is hard to search or the query has low
                 selectivity. These are the most important unsolved
                 cases in real applications. As a practical advantage,
                 our data structure is one of the few that does not need
                 to tune parameters, which makes it appealing for use by
                 non-experts.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multimedia databases; similarity or proximity search;
                 spatial and multidimensional search; spatial
                 approximation tree",
}

@Article{Mihaila:2002:LAD,
  author =       "George A. Mihaila and Louiqa Raschid and Anthony
                 Tomasic",
  title =        "Locating and accessing data repositories with
                 {WebSemantics}",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "47--57",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780200061",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110047.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110047.pdf",
  abstract =     "Many collections of scientific data in particular
                 disciplines are available today on the World Wide Web.
                 Most of these data sources are compliant with some
                 standard for interoperable access. In addition, sources
                 may support a common semantics, i.e., a shared meaning
                 for the data types and their domains. However, sharing
                 data among a global community of users is still
                 difficult because of the following reasons: (i) data
                 providers need a mechanism for describing and
                 publishing available sources of data; (ii) data
                 administrators need a mechanism for discovering the
                 location of published sources and obtaining metadata
                 from these sources; and (iii) users need a mechanism
                 for browsing and selecting sources. This paper
                 describes a system, WebSemantics, that accomplishes the
                 above tasks. We describe an architecture for the
                 publication and discovery of scientific data sources,
                 which is an extension of the World Wide Web
                 architecture and protocols. We support catalogs
                 containing metadata about data sources for some
                 application domain. We define a language for
                 discovering sources and querying their metadata. We
                 then describe the WebSemantics prototype.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data discovery; data integration; mediators; query
                 languages; World Wide Web; XML",
}

@Article{Ferrari:2002:ASD,
  author =       "E. Ferrari and N. R. Adam and V. Atluri and E. Bertino
                 and U. Capuozzo",
  title =        "An authorization system for digital libraries",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "58--67",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780200063",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110058.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110058.pdf",
  abstract =     "Digital Libraries (DLs) introduce several challenging
                 requirements with respect to the formulation,
                 specification, and enforcement of adequate data
                 protection policies. Unlike conventional database
                 environments, a DL environment typically is
                 characterized by a dynamic subject population, often
                 making accesses from remote locations, and by an
                 extraordinarily large amount of multimedia information,
                 stored in a variety of formats. Moreover, in a DL
                 environment, access policies are often specified based
                 on subject qualifications and characteristics, rather
                 than subject identity. Traditional authorization models
                 are not adequate to meet access control requirements of
                 DLs. In this paper, we present a {\em Digital Library
                 Authorization System\/} (DLAS). DLAS employs a
                 content-based authorization model, called a {\em
                 Digital Library Authorization Model\/} (DLAM) which was
                 proposed in previous work [1].",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access control; credentials; digital libraries",
}

@Article{Marathe:2002:QPT,
  author =       "Arunprasad P. Marathe and Kenneth Salem",
  title =        "Query processing techniques for arrays",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "1",
  pages =        "68--91",
  month =        aug,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780200062",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:00 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011001/20110068.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011001/20110068.pdf",
  abstract =     "Arrays are a common and important class of data. At
                 present, database systems do not provide adequate array
                 support: arrays can neither be easily defined nor
                 conveniently manipulated. Further, array manipulations
                 are not optimized. This paper describes a language
                 called the {\em Array Manipulation Language\/} (AML),
                 for expressing array manipulations, and a collection of
                 optimization techniques for AML expressions. In the AML
                 framework for array manipulation, arbitrary
                 externally-defined functions can be applied to arrays
                 in a structured manner. AML can be adapted to different
                 application domains by choosing appropriate external
                 function definitions. This paper concentrates on arrays
                 occurring in databases of digital images such as
                 satellite or medical images. AML queries can be treated
                 declaratively and subjected to rewrite optimizations.
                 Rewriting minimizes the number of applications of
                 potentially costly external functions required to
                 compute a query result. AML queries can also be
                 optimized for space. Query results are generated a
                 piece at a time by pipelined execution plans, and the
                 amount of memory required by a plan depends on the
                 order in which pieces are generated. An optimizer can
                 consider generating the pieces of the query result in a
                 variety of orders, and can efficiently choose orders
                 that require less space. An AML-based prototype array
                 database system called {\em ArrayDB\/} has been built,
                 and it is used to show the effectiveness of these
                 optimization techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "array manipulation language; array query optimization;
                 declarative query language; memory-usage optimization;
                 pipelined evaluation; user-defined functions",
}

@Article{Sakurai:2002:SIH,
  author =       "Yasushi Sakurai and Masatoshi Yoshikawa and Shunsuke
                 Uemura and Haruhiko Kojima",
  title =        "Spatial indexing of high-dimensional data based on
                 relative approximation",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "2",
  pages =        "93--108",
  month =        oct,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0066-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:01 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110093.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011002/20110093.pdf",
  abstract =     "We propose a novel index structure, the A-tree
                 (approximation tree), for similarity searches in
                 high-dimensional data. The basic idea of the A-tree is
                 the introduction of virtual bounding rectangles (VBRs)
                 which contain and approximate MBRs or data objects.
                 VBRs can be represented quite compactly and thus affect
                 the tree configuration both quantitatively and
                 qualitatively. First, since tree nodes can contain a
                 large number of VBR entries, fanout becomes large,
                 which increases search speed. More importantly, we have
                 a free hand in arranging MBRs and VBRs in the tree
                 nodes. Each A-tree node contains an MBR and its
                 children VBRs. Therefore, by fetching an A-tree node,
                 we can obtain information on the exact position of a
                 parent MBR and the approximate position of its
                 children. We have performed experiments using both
                 synthetic and real data sets. For the real data sets,
                 the A-tree outperforms the SR-tree and the VA-file in
                 all dimensionalities up to 64 dimensions, which is the
                 highest dimension in our experiments. Additionally, we
                 propose a cost model for the A-tree. We verify the
                 validity of the cost model for synthetic and real data
                 sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "high-dimensional data; relative approximation;
                 similarity search",
}

@Article{Hjaltason:2002:SCP,
  author =       "Gisli R. Hjaltason and Hanan Samet",
  title =        "Speeding up construction of {PMR} quadtree-based
                 spatial indexes",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "2",
  pages =        "109--137",
  month =        oct,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0067-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:01 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110109.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011002/20110109.pdf",
  abstract =     "Spatial indexes, such as those based on the quadtree,
                 are important in spatial databases for efficient
                 execution of queries involving spatial constraints,
                 especially when the queries involve spatial joins. In
                 this paper we present a number of techniques for
                 speeding up the construction of quadtree-based spatial
                 indexes, specifically the PMR quadtree, which can index
                 arbitrary spatial data. We assume a quadtree
                 implementation using the ``linear quadtree'', a
                 disk-resident representation that stores objects
                 contained in the leaf nodes of the quadtree in a linear
                 index (e.g., a B-tree) ordered based on a space-filling
                 curve. We present two complementary techniques: an
                 improved insertion algorithm and a bulk-loading method.
                 The bulk-loading method can be extended to handle
                 bulk-insertions into an existing PMR quadtree. We make
                 some analytical observations about the I/O cost and CPU
                 cost of our PMR quadtree bulk-loading algorithm, and
                 conduct an extensive empirical study of the techniques
                 presented in the paper. Our techniques are found to
                 yield significant speedup compared to traditional
                 quadtree building methods, even when the size of a main
                 memory buffer is very small compared to the size of the
                 resulting quadtrees.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "bulk-loading; I/O; spatial indexing",
}

@Article{Nanopoulos:2002:ESS,
  author =       "Alexandros Nanopoulos and Yannis Manolopoulos",
  title =        "Efficient similarity search for market basket data",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "2",
  pages =        "138--152",
  month =        oct,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0068-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:01 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110138.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011002/20110138.pdf",
  abstract =     "Several organizations have developed very large market
                 basket databases for the maintenance of customer
                 transactions. New applications, e.g., Web
                 recommendation systems, present the requirement for
                 processing similarity queries in market basket
                 databases. In this paper, we propose a novel scheme for
                 similarity search queries in basket data. We develop a
                 new representation method, which, in contrast to
                 existing approaches, is proven to provide correct
                 results. New algorithms are proposed for the processing
                 of similarity queries. Extensive experimental results,
                 for a variety of factors, illustrate the superiority of
                 the proposed scheme over the state-of-the-art method.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data mining; market basket data; nearest-neighbor;
                 similarity search",
}

@Article{Feng:2002:TMM,
  author =       "Ling Feng and Jeffrey Xu Yu and Hongjun Lu and Jiawei
                 Han",
  title =        "A template model for multidimensional
                 inter-transactional association rules",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "2",
  pages =        "153--175",
  month =        oct,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0069-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:01 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011002.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011002/20110153.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011002/20110153.pdf",
  abstract =     "Multidimensional inter-transactional association rules
                 extend the traditional association rules to describe
                 more general associations among items with multiple
                 properties across transactions. ``{\em After McDonald
                 and Burger King open branches, KFC will open a branch
                 two months later and one mile away}'' is an example of
                 such rules. Since the number of potential
                 inter-transactional association rules tends to be
                 extremely large, mining inter-transactional
                 associations poses more challenges on efficient
                 processing than mining traditional intra-transactional
                 associations. In order to make such association rule
                 mining truly practical and computationally tractable,
                 in this study we present a template model to help users
                 declare the interesting {\em multidimensional
                 inter-transactional associations\/} to be mined. With
                 the guidance of templates, several optimization
                 techniques, i.e., joining, converging, and speeding,
                 are devised to speed up the discovery of
                 inter-transactional association rules. We show, through
                 a series of experiments on both synthetic and real-life
                 data sets, that these optimization techniques can yield
                 significant performance benefits.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "intra-transactional/inter-transactional association
                 rules; multidimensional context; template model",
}

@Article{Apers:2002:E,
  author =       "Peter Apers and Stefano Ceri and Richard Snodgrass",
  title =        "Editorial",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "177--178",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0075-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110177.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110177.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{An:2002:EPT,
  author =       "Ning An and Sudhanva Gurumurthi and Anand
                 Sivasubramaniam and Narayanan Vijaykrishnan and Mahmut
                 Kandemir and Mary Jane Irwin",
  title =        "Energy-performance trade-offs for spatial access
                 methods on memory-resident data",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "179--197",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0073-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110179.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110179.pdf",
  abstract =     "The proliferation of mobile and pervasive computing
                 devices has brought energy constraints into the
                 limelight. Energy-conscious design is important at all
                 levels of system architecture, and the software has a
                 key role to play in conserving battery energy on these
                 devices. With the increasing popularity of spatial
                 database applications, and their anticipated deployment
                 on mobile devices (such as road atlases and GPS-based
                 applications), it is critical to examine the energy
                 implications of spatial data storage and access methods
                 for memory resident datasets. While there has been
                 extensive prior research on spatial access methods on
                 resource-rich environments, this is, perhaps, the first
                 study to examine their suitability for
                 resource-constrained environments. Using a detailed
                 cycle-accurate energy estimation framework and four
                 different datasets, this paper examines the pros and
                 cons of three previously proposed spatial indexing
                 alternatives from both the energy and performance
                 angles. Specifically, the Quadtree, Packed R-tree, and
                 Buddy-Tree structures are evaluated and compared with a
                 brute-force approach that does not use an index. The
                 results show that there are both performance and energy
                 trade-offs between the indexing schemes for the
                 different queries. The nature of the query also plays
                 an important role in determining the energy-performance
                 trade-offs. Further, technological trends and
                 architectural enhancements are influencing factors on
                 the relative behavior of the index structures. The work
                 in the query has a bearing on how and where (on a
                 mobile client or/and on a server) it should be
                 performed for performance and energy savings. The
                 results from this study will be beneficial for the
                 design and implementation of embedded spatial
                 databases, accelerating their deployment on numerous
                 mobile devices.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "energy optimization; multidimensional indexing;
                 resource-constrained computing; spatial data",
}

@Article{Ailamaki:2002:DPL,
  author =       "Anastassia Ailamaki and David J. DeWitt and Mark D.
                 Hill",
  title =        "Data page layouts for relational databases on deep
                 memory hierarchies",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "198--215",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0074-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110198.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110198.pdf",
  abstract =     "Relational database systems have traditionally
                 optimized for I/O performance and organized records
                 sequentially on disk pages using the N-ary Storage
                 Model (NSM) (a.k.a., slotted pages). Recent research,
                 however, indicates that cache utilization and
                 performance is becoming increasingly important on
                 modern platforms. In this paper, we first demonstrate
                 that in-page data placement is the key to high cache
                 performance and that NSM exhibits low cache utilization
                 on modern platforms. Next, we propose a new data
                 organization model called PAX (Partition Attributes
                 Across), that significantly improves cache performance
                 by grouping together all values of each attribute
                 within each page. Because PAX only affects layout
                 inside the pages, it incurs no storage penalty and does
                 not affect I/O behavior. According to our experimental
                 results (which were obtained without using any indices
                 on the participating relations), when compared to NSM:
                 (a) PAX exhibits superior cache and memory bandwidth
                 utilization, saving at least 75\% of NSM's stall time
                 due to data cache accesses; (b) range selection queries
                 and updates on memory-resident relations execute 1725\%
                 faster; and (c) TPC-H queries involving I/O execute
                 1148\% faster. Finally, we show that PAX performs well
                 across different memory system designs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cache-conscious database systems; disk page layout;
                 relational data placement",
}

@Article{Chirkova:2002:FPV,
  author =       "Rada Chirkova and Alon Y. Halevy and Dan Suciu",
  title =        "A formal perspective on the view selection problem",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "216--237",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0070-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110216.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110216.pdf",
  abstract =     "The view selection problem is to choose a set of views
                 to materialize over a database schema, such that the
                 cost of evaluating a set of workload queries is
                 minimized and such that the views fit into a
                 prespecified storage constraint. The two main
                 applications of the view selection problem are
                 materializing views in a database to speed up query
                 processing, and selecting views to materialize in a
                 data warehouse to answer decision support queries. In
                 addition, view selection is a core problem for
                 intelligent data placement over a wide-area network for
                 data integration applications and data management for
                 ubiquitous computing. We describe several fundamental
                 results concerning the view selection problem. We
                 consider the problem for views and workloads that
                 consist of equality-selection, project and join
                 queries, and show that the complexity of the problem
                 depends crucially on the quality of the estimates that
                 a query optimizer has on the size of the views it is
                 considering to materialize. When a query optimizer has
                 good estimates of the sizes of the views, we show a
                 somewhat surprising result, namely, that an optimal
                 choice of views may involve a number of views that is
                 exponential in the size of the database schema. On the
                 other hand, when an optimizer uses standard estimation
                 heuristics, we show that the number of necessary views
                 and the expression size of each view are polynomially
                 bounded.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "materialized views; view selection",
}

@Article{Aguilera:2002:VLS,
  author =       "Vincent Aguilera and Sophie Cluet and Tova Milo and
                 Pierangelo Veltri and Dan Vodislav",
  title =        "Views in a large-scale {XML} repository",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "238--255",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0065-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110238.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110238.pdf",
  abstract =     "We are interested in defining and querying views in a
                 huge and highly heterogeneous XML repository (Web
                 scale). In this context, view definitions are very
                 large, involving lots of sources, and there is no
                 apparent limitation to their size. This raises
                 interesting problems that we address in the paper: (i)
                 how to distribute views over several machines without
                 having a negative impact on the query translation
                 process; (ii) how to quickly select the relevant part
                 of a view given a query; (iii) how to minimize the cost
                 of communicating potentially large queries to the
                 machines where they will be evaluated. The solution
                 that we propose is based on a simple view definition
                 language that allows for automatic generation of views.
                 The language maps paths in the view abstract DTD to
                 paths in the concrete source DTDs. It enables a
                 distributed implementation of the view system that is
                 scalable both in terms of data and load. In particular,
                 the query translation algorithm is shown to have a good
                 (linear) complexity.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "query evaluation; semantic integration; views;
                 warehouse; XML",
}

@Article{Hunt:2002:DIL,
  author =       "Ela Hunt and Malcolm P. Atkinson and Robert W.
                 Irving",
  title =        "Database indexing for large {DNA} and protein sequence
                 collections",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "3",
  pages =        "256--271",
  month =        nov,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s007780200064",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:02 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011003.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "Special issue VLDB best papers 2001.",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011003/20110256.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011003/20110256.pdf",
  abstract =     "Our aim is to develop new database technologies for
                 the approximate matching of unstructured string data
                 using indexes. We explore the potential of the suffix
                 tree data structure in this context. We present a new
                 method of building suffix trees, allowing us to build
                 trees in excess of RAM size, which has hitherto not
                 been possible. We show that this method performs in
                 practice as well as the $O(n)$ method of Ukkonen [70].
                 Using this method we build indexes for 200 Mb of
                 protein and 300 Mbp of DNA, whose disk-image exceeds
                 the available RAM. We show experimentally that suffix
                 trees can be effectively used in approximate string
                 matching with biological data. For a range of query
                 lengths and error bounds the suffix tree reduces the
                 size of the unoptimised $O(mn)$ dynamic programming
                 calculation required in the evaluation of string
                 similarity, and the gain from indexing increases with
                 index size. In the indexes we built this reduction is
                 significant, and less than 0.3\% of the expected matrix
                 is evaluated. We detail the requirements for further
                 database and algorithmic research to support efficient
                 use of large suffix indexes in biological
                 applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate matching; biological sequence; database
                 index; suffix tree",
}

@Article{Halevy:2002:GE,
  author =       "Alon Y. Halevy",
  title =        "Guest Editorial",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "273--273",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0082-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110273.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110273.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jagadish:2002:TNX,
  author =       "H. V. Jagadish and S. Al-Khalifa and A. Chapman and L.
                 V. S. Lakshmanan and A. Nierman and S. Paparizos and
                 J. M. Patel and D. Srivastava and N. Wiwatwattana and
                 Y. Wu and C. Yu",
  title =        "{TIMBER}: a native {XML} database",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "274--291",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0081-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110274.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110274.pdf",
  abstract =     "This paper describes the overall design and
                 architecture of the Timber XML database system
                 currently being implemented at the University of
                 Michigan. The system is based upon a bulk algebra for
                 manipulating trees, and natively stores XML. New access
                 methods have been developed to evaluate queries in the
                 XML context, and new cost estimation and query
                 optimization techniques have also been developed. We
                 present performance numbers to support some of our
                 design decisions. We believe that the key intellectual
                 contribution of this system is a comprehensive
                 set-at-a-time query processing ability in a native XML
                 store, with all the standard components of relational
                 query processing, including algebraic rewriting and a
                 cost-based optimizer.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "algebra; document management; hierarchical; query
                 processing; semi-structured",
}

@Article{Fiebig:2002:ANX,
  author =       "T. Fiebig and S. Helmer and C.-C. Kanne and G.
                 Moerkotte and J. Neumann and R. Schiele and T. Westmann",
  title =        "Anatomy of a native {XML} base management system",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "292--314",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0080-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110292.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110292.pdf",
  abstract =     "Several alternatives to manage large XML document
                 collections exist, ranging from file systems over
                 relational or other database systems to specifically
                 tailored XML base management systems. In this paper we
                 give a tour of Natix, a database management system
                 designed from scratch for storing and processing XML
                 data. Contrary to the common belief that management of
                 XML data is just another application for traditional
                 databases like relational systems, we illustrate how
                 almost every component in a database system is affected
                 in terms of adequacy and performance. We show how to
                 design and optimize areas such as storage, transaction
                 management --- comprising recovery and multi-user
                 synchronization --- as well as query processing for
                 XML.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database; XML",
}

@Article{Amer-Yahia:2002:TPQ,
  author =       "S. Amer-Yahia and S. Cho and L. V. S. Lakshmanan and
                 D. Srivastava",
  title =        "Tree pattern query minimization",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "315--331",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0076-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110315.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110315.pdf",
  abstract =     "Tree patterns form a natural basis to query
                 tree-structured data such as XML and LDAP. To improve
                 the efficiency of tree pattern matching, it is
                 essential to quickly identify and eliminate redundant
                 nodes in the pattern. In this paper, we study tree
                 pattern minimization both in the absence and in the
                 presence of integrity constraints (ICs) on the
                 underlying tree-structured database. In the absence of
                 ICs, we develop a polynomial-time query minimization
                 algorithm called CIM, whose efficiency stems from two
                 key properties: (i) a node cannot be redundant unless
                 its children are; and (ii) the order of elimination of
                 redundant nodes is immaterial. When ICs are considered
                 for minimization, we develop a technique for query
                 minimization based on three fundamental operations:
                 augmentation (an adaptation of the well-known chase
                 procedure), minimization (based on homomorphism
                 techniques), and reduction. We show the surprising
                 result that the algorithm, referred to as ACIM,
                 obtained by first augmenting the tree pattern using
                 ICs, and then applying CIM, always finds the unique
                 minimal equivalent query. While ACIM is polynomial
                 time, it can be expensive in practice because of its
                 inherent non-locality. We then present a fast
                 algorithm, CDM, that identifies and eliminates local
                 redundancies due to ICs, based on propagating
                 ``information labels'' up the tree pattern. CDM can be
                 applied prior to ACIM for improving the minimization
                 efficiency. We complement our analytical results with
                 an experimental study that shows the effectiveness of
                 our tree pattern minimization techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "query minimization; tree patterns; XML",
}

@Article{Chien:2002:ESM,
  author =       "S.-Y. Chien and V. J. Tsotras and C. Zaniolo",
  title =        "Efficient schemes for managing multiversion {XML}
                 documents",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "332--353",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0079-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110332.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110332.pdf",
  abstract =     "Multiversion support for XML documents is needed in
                 many critical applications, such as software
                 configuration control, cooperative authoring, web
                 information warehouses, and ``e-permanence'' of web
                 documents. In this paper, we introduce efficient and
                 robust techniques for: (i) storing and retrieving; (ii)
                 viewing and exchanging; and (iii) querying multiversion
                 XML documents. We first discuss the limitations of
                 traditional version control methods, such as RCS and
                 SCCS, and then propose novel techniques that overcome
                 their limitations. Initially, we focus on the problem
                 of managing secondary storage efficiently, and
                 introduce an {\em edit-based\/} versioning scheme that
                 enhances RCS with an effective clustering policy based
                 on the concept of page-usefulness. The new scheme
                 drastically improves version retrieval at the expense
                 of a small (linear) space overhead. However, the
                 edit-based approach falls short of achieving objectives
                 (ii) and (iii). Therefore, we introduce and investigate
                 a second scheme, which is reference-based and preserves
                 the structure of the original document. In the
                 reference-based approach, a multiversion document can
                 be represented as yet another XML document, which can
                 be easily exchanged and viewed on the web; furthermore,
                 simple queries are also expressed and supported well
                 under this representation. To achieve objective (i), we
                 extend the page-usefulness clustering technique to the
                 reference-based scheme. After characterizing the
                 asymptotic behavior of the new techniques proposed, the
                 paper presents the results of an experimental study
                 evaluating and comparing their performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "historical queries; temporal clustering; temporal
                 indexing; version management; XML database",
}

@Article{Chan:2002:EFX,
  author =       "C.-Y. Chan and P. Felber and M. Garofalakis and R.
                 Rastogi",
  title =        "Efficient filtering of {XML} documents with {XPath}
                 expressions",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "354--379",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0077-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110354.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110354.pdf",
  abstract =     "The publish/subscribe paradigm is a popular model for
                 allowing publishers (i.e., data generators) to
                 selectively disseminate data to a large number of
                 widely dispersed subscribers (i.e., data consumers) who
                 have registered their interest in specific information
                 items. Early publish/subscribe systems have typically
                 relied on simple subscription mechanisms, such as
                 keyword or ``bag of words'' matching, or simple
                 comparison predicates on attribute values. The
                 emergence of XML as a standard for information exchange
                 on the Internet has led to an increased interest in
                 using more expressive subscription mechanisms (e.g.,
                 based on XPath expressions) that exploit both the
                 structure and the content of published XML documents.
                 Given the increased complexity of these new
                 data-filtering mechanisms, the problem of effectively
                 identifying the subscription profiles that match an
                 incoming XML document poses a difficult and important
                 research challenge. In this paper, we propose a novel
                 index structure, termed XTrie, that supports the
                 efficient filtering of XML documents based on XPath
                 expressions. Our XTrie index structure offers several
                 novel features that, we believe, make it especially
                 attractive for large-scale publish/subscribe systems.
                 First, XTrie is designed to support effective filtering
                 based on complex XPath expressions (as opposed to
                 simple, single-path specifications). Second, our XTrie
                 structure and algorithms are designed to support both
                 ordered and unordered matching of XML data. Third, by
                 indexing on sequences of elements organized in a trie
                 structure and using a sophisticated matching algorithm,
                 XTrie is able to both reduce the number of unnecessary
                 index probes as well as avoid redundant matchings,
                 thereby providing extremely efficient filtering. Our
                 experimental results over a wide range of XML document
                 and XPath expression workloads demonstrate that our
                 XTrie index structure outperforms earlier approaches by
                 wide margins.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data dissemination; document filtering; index
                 structure; XML; XPath",
}

@Article{Ives:2002:XQE,
  author =       "Zachary G. Ives and A. Y. Halevy and D. S. Weld",
  title =        "An {XML} query engine for network-bound data",
  journal =      j-VLDB-J,
  volume =       "11",
  number =       "4",
  pages =        "380--402",
  month =        dec,
  year =         "2002",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0078-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:03 MDT 2008",
  bibsource =    "http://link.springer.de/link/service/journals/00778/tocs/t2011004.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/2011004/20110380.htm;
                 http://link.springer.de/link/service/journals/00778/papers/2011004/20110380.pdf",
  abstract =     "XML has become the lingua franca for data exchange and
                 integration across administrative and enterprise
                 boundaries. Nearly all data providers are adding XML
                 import or export capabilities, and standard XML Schemas
                 and DTDs are being promoted for all types of data
                 sharing. The ubiquity of XML has removed one of the
                 major obstacles to integrating data from widely
                 disparate sources --- namely, the heterogeneity of data
                 formats. However, general-purpose integration of data
                 across the wide are a also requires a query processor
                 that can query data sources on demand, receive streamed
                 XML data from them, and combine and restructure the
                 data into new XML output --- while providing good
                 performance for both batch-oriented and ad hoc,
                 interactive queries. This is the goal of the Tukwila
                 data integration system, the first system that focuses
                 on network-bound, dynamic XML data sources. In contrast
                 to previous approaches, which must read, parse, and
                 often store entire XML objects before querying them,
                 Tukwila can return query results even as the data is
                 streaming into the system. Tukwila is built with a new
                 system architecture that extends adaptive query
                 processing and relational-engine techniques into the
                 XML realm, as facilitated by a pair of operators that
                 incrementally evaluate a query's input path expressions
                 as data is read. In this paper, we describe the Tukwila
                 architecture and its novel aspects, and we
                 experimentally demonstrate that Tukwila provides better
                 overall query performance and faster initial answers
                 than existing systems, and has excellent scalability.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data integration; data streams; query processing; web
                 and databases; XML",
}

@Article{Ozsu:2003:NPA,
  author =       "M. Tamer {\"O}zsu",
  title =        "New partnership with {ACM} and update on the journal",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "1",
  pages =        "1--1",
  month =        may,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0089-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:05 MDT 2008",
  bibsource =    "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120001.htm;
                 http://link.springer.de/link/service/journals/00778/papers/3012001/30120001.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sheth:2003:CRK,
  author =       "A. Sheth and S. Thacker and S. Patel",
  title =        "Complex relationships and knowledge discovery support
                 in the {InfoQuilt} system",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "1",
  pages =        "2--27",
  month =        may,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0071-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:05 MDT 2008",
  bibsource =    "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120002.htm;
                 http://link.springer.de/link/service/journals/00778/papers/3012001/30120002.pdf",
  abstract =     "Support for semantic content is becoming more common
                 in Web-accessible information systems. We see this
                 support emerging with the use of ontologies and
                 machine-readable, annotated documents. The practice of
                 domain modeling coupled with the extraction of
                 domain-specific, contextually relevant metadata also
                 supports the use of semantics. These advancements
                 enable knowledge discovery approaches that define
                 complex relationships between data that is autonomously
                 collected and managed. The InfoQuilt (One of the
                 incarnations of the InfoQuilt system, as applied to the
                 geographic information as part of the NSF Digital
                 Library II initiative is the ADEPT-UGA system [Ade].
                 This research was funded in part by National Science
                 Foundation grant IIS-9817432.) system supports one such
                 knowledge discovery approach. This paper presents
                 (parts of) the InfoQuilt system with the focus on its
                 use for modeling and utilizing complex semantic
                 inter-domain relationships to enable human-assisted
                 knowledge discovery over Web-accessible heterogeneous
                 data. This includes the specification and execution of
                 Information Scale (IScapes), a semantically rich
                 information request and correlation mechanism.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Verykios:2003:BDM,
  author =       "V. S. Verykios and G. V. Moustakides and M. G.
                 Elfeky",
  title =        "A {Bayesian} decision model for cost optimal record
                 matching",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "1",
  pages =        "28--40",
  month =        may,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0072-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:05 MDT 2008",
  bibsource =    "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120028.htm;
                 http://link.springer.de/link/service/journals/00778/papers/3012001/30120028.pdf",
  abstract =     "In an error-free system with perfectly clean data, the
                 construction of a global view of the data consists of
                 linking --- in relational terms, joining --- two or
                 more tables on their key fields. Unfortunately, most of
                 the time, these data are neither carefully controlled
                 for quality nor necessarily defined commonly across
                 different data sources. As a result, the creation of
                 such a global data view resorts to approximate joins.
                 In this paper, an optimal solution is proposed for the
                 matching or the linking of database record pairs in the
                 presence of inconsistencies, errors or missing values
                 in the data. Existing models for record matching rely
                 on decision rules that minimize the probability of
                 error, that is the probability that a sample (a
                 measurement vector) is assigned to the wrong class. In
                 practice though, minimizing the probability of error is
                 not the best criterion to design a decision rule
                 because the misclassifications of different samples may
                 have different consequences. In this paper we present a
                 decision model that minimizes the cost of making a
                 decision. In particular: (a) we present a decision
                 rule: (b) we prove that this rule is optimal with
                 respect to the cost of a decision: and (c) we compute
                 the probabilities of the two types of errors (Type I
                 and Type II) that incur when this rule is applied. We
                 also present a closed form decision model for a certain
                 class of record comparison pairs along with an example,
                 and results from comparing the proposed cost-based
                 model to the error-based model, for large record
                 comparison spaces.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cost optimal statistical model; data cleaning; record
                 linkage",
}

@Article{Cui:2003:LTG,
  author =       "Y. Cui and J. Widom",
  title =        "Lineage tracing for general data warehouse
                 transformations",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "1",
  pages =        "41--58",
  month =        may,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0083-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:05 MDT 2008",
  bibsource =    "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120041.htm;
                 http://link.springer.de/link/service/journals/00778/papers/3012001/30120041.pdf",
  abstract =     "Data warehousing systems integrate information from
                 operational data sources into a central repository to
                 enable analysis and mining of the integrated
                 information. During the integration process, source
                 data typically undergoes a series of {\em
                 transformations}, which may vary from simple algebraic
                 operations or aggregations to complex ``data
                 cleansing'' procedures. In a warehousing environment,
                 the {\em data lineage\/} problem is that of tracing
                 warehouse data items back to the original source items
                 from which they were derived. We formally define the
                 lineage tracing problem in the presence of general data
                 warehouse transformations, and we present algorithms
                 for lineage tracing in this environment. Our tracing
                 procedures take advantage of known structure or
                 properties of transformations when present, but also
                 work in the absence of such information. Our results
                 can be used as the basis for a lineage tracing tool in
                 a general warehousing setting, and also can guide the
                 design of data warehouses that enable efficient lineage
                 tracing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data lineage; data warehouse; inverse; lineage
                 tracing; transformation",
}

@Article{Medjahed:2003:BBI,
  author =       "B. Medjahed and B. Benatallah and A. Bouguettaya and
                 A. H. H. Ngu and A. K. Elmagarmid",
  title =        "Business-to-business interactions: issues and enabling
                 technologies",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "1",
  pages =        "59--85",
  month =        may,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0087-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:05 MDT 2008",
  bibsource =    "http://link.springer-ny.com/link/service/journals/UNKNOWN/tocs/t3012001.htm;
                 http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.de/link/service/journals/00778/bibs/3012001/30120059.htm;
                 http://link.springer.de/link/service/journals/00778/papers/3012001/30120059.pdf",
  abstract =     "Business-to-Business (B2B) technologies pre-date the
                 Web. They have existed for at least as long as the
                 Internet. B2B applications were among the first to take
                 advantage of advances in computer networking. The
                 Electronic Data Interchange (EDI) business standard is
                 an illustration of such an early adoption of the
                 advances in computer networking. The ubiquity and the
                 affordability of the Web has made it possible for the
                 masses of businesses to automate their B2B
                 interactions. However, several issues related to scale,
                 content exchange, autonomy, heterogeneity, and other
                 issues still need to be addressed. In this paper, we
                 survey the main techniques, systems, products, and
                 standards for B2B interactions. We propose a set of
                 criteria for assessing the different B2B interaction
                 techniques, standards, and products.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "B2B interactions; components; e-commerce; EDI; Web
                 services; workflows; XML",
}

@Article{Bernstein:2003:GE,
  author =       "Philip A. Bernstein and Yannis Ioannidis and Raghu
                 Ramakrishnan",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "87--88",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0092-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ramamurthy:2003:CFM,
  author =       "Ravishankar Ramamurthy and David J. DeWitt and Qi
                 Su",
  title =        "A case for fractured mirrors",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "89--101",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0093-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The decomposition storage model (DSM) vertically
                 partitions all attributes of a table and has excellent
                 I/O behavior when the number of attributes accessed by
                 a query is small. It also has a better cache footprint
                 than the standard storage model (NSM) used by most
                 database systems. However, DSM incurs a high cost in
                 reconstructing the original tuple from its partitions.
                 We first revisit some of the performance problems
                 associated with DSM and suggest a simple indexing
                 strategy and compare different reconstruction
                 algorithms. Then we propose a new mirroring scheme,
                 termed fractured mirrors, using both NSM and DSM
                 models. This scheme combines the best aspects of both
                 models, along with the added benefit of mirroring to
                 better serve an ad hoc query workload. A prototype
                 system has been built using the Shore storage manager,
                 and performance is evaluated using queries from the
                 TPC-H workload.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data placement; disk mirroring; vertical
                 partitioning",
}

@Article{Chan:2003:RTE,
  author =       "Chee-Yong Chan and Minos Garofalakis and Rajeev
                 Rastogi",
  title =        "{RE}-tree: an efficient index structure for regular
                 expressions",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "102--119",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0094-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Due to their expressive power, regular expressions
                 (REs) are quickly becoming an integral part of language
                 specifications for several important application
                 scenarios. Many of these applications have to manage
                 huge databases of RE specifications and need to provide
                 an effective matching mechanism that, given an input
                 string, quickly identifies the REs in the database that
                 match it. In this paper, we propose the RE-tree, a
                 novel index structure for large databases of RE
                 specifications. Given an input query string, the
                 RE-tree speeds up the retrieval of matching REs by
                 focusing the search and comparing the input string with
                 only a small fraction of REs in the database. Even
                 though the RE-tree is similar in spirit to other
                 tree-based structures that have been proposed for
                 indexing multidimensional data, RE indexing is
                 significantly more challenging since REs typically
                 represent infinite sets of strings with no well-defined
                 notion of spatial locality. To address these new
                 challenges, our RE-tree index structure relies on novel
                 measures for comparing the relative sizes of infinite
                 regular languages. We also propose innovative solutions
                 for the various RE-tree operations including the
                 effective splitting of RE-tree nodes and computing a
                 `tight' bounding RE for a collection of REs. Finally,
                 we demonstrate how sampling-based approximation
                 algorithms can be used to significantly speed up the
                 performance of RE-tree operations. Preliminary
                 experimental results with moderately large synthetic
                 data sets indicate that the RE-tree is effective in
                 pruning the search space and easily outperforms naive
                 sequential search approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "index structure; regular expressions; sampling-based
                 approximations; size measures",
}

@Article{Abadi:2003:ANM,
  author =       "Daniel J. Abadi and Don Carney and Ugur
                 {\c{C}}etintemel and Mitch Cherniack and Christian
                 Convey and Sangdon Lee and Michael Stonebraker and
                 Nesime Tatbul and Stan Zdonik",
  title =        "{Aurora}: a new model and architecture for data stream
                 management",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "120--139",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0095-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper describes the basic processing model and
                 architecture of Aurora, a new system to manage data
                 streams for monitoring applications. Monitoring
                 applications differ substantially from conventional
                 business data processing. The fact that a software
                 system must process and react to continual inputs from
                 many sources (e.g., sensors) rather than from human
                 operators requires one to rethink the fundamental
                 architecture of a DBMS for this application area. In
                 this paper, we present Aurora, a new DBMS currently
                 under construction at Brandeis University, Brown
                 University, and M.I.T. We first provide an overview of
                 the basic Aurora model and architecture and then
                 describe in detail a stream-oriented set of
                 operators.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous queries; data stream management; database
                 triggers; quality-of-service; real-time systems",
}

@Article{Chandrasekaran:2003:PSS,
  author =       "Sirish Chandrasekaran and Michael J. Franklin",
  title =        "{PSoup}: a system for streaming queries over streaming
                 data",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "140--156",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0096-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recent work on querying data streams has focused on
                 systems where newly arriving data is processed and
                 continuously streamed to the user in real time. In many
                 emerging applications, however, ad hoc queries and/or
                 intermittent connectivity also require the processing
                 of data that arrives prior to query submission or
                 during a period of disconnection. For such
                 applications, we have developed PSoup, a system that
                 combines the processing of ad hoc and continuous
                 queries by treating data and queries symmetrically,
                 allowing new queries to be applied to old data and new
                 data to be applied to old queries. PSoup also supports
                 intermittent connectivity by separating the computation
                 of query results from the delivery of those results.
                 PSoup builds on adaptive query-processing techniques
                 developed in the Telegraph project at UC Berkeley. In
                 this paper, we describe PSoup and present experiments
                 that demonstrate the effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "disconnected operation; query-data duality; stream
                 query processing",
}

@Article{Agrawal:2003:WRD,
  author =       "Rakesh Agrawal and Peter J. Haas and Jerry Kiernan",
  title =        "Watermarking relational data: framework, algorithms
                 and analysis",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "157--169",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0097-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We enunciate the need for watermarking database
                 relations to deter data piracy, identify the
                 characteristics of relational data that pose unique
                 challenges for watermarking, and delineate desirable
                 properties of a watermarking system for relational
                 data. We then present an effective watermarking
                 technique geared for relational data. This technique
                 ensures that some bit positions of some of the
                 attributes of some of the tuples contain specific
                 values. The specific bit locations and values are
                 algorithmically determined under the control of a
                 secret key known only to the owner of the data. This
                 bit pattern constitutes the watermark. Only if one has
                 access to the secret key can the watermark be detected
                 with high probability. Detecting the watermark requires
                 access neither to the original data nor the watermark,
                 and the watermark can be easily and efficiently
                 maintained in the presence of insertions, updates, and
                 deletions. Our analysis shows that the proposed
                 technique is robust against various forms of malicious
                 attacks as well as benign updates to the data. Using an
                 implementation running on DB2, we also show that the
                 algorithms perform well enough to be used in real-world
                 applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database; information hiding; steganography;
                 watermarking",
}

@Article{Chakrabarti:2003:FAT,
  author =       "Soumen Chakrabarti and Shourya Roy and Mahesh V.
                 Soundalgekar",
  title =        "Fast and accurate text classification via multiple
                 linear discriminant projections",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "2",
  pages =        "170--185",
  month =        aug,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0098-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:06 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Support vector machines (SVMs) have shown superb
                 performance for text classification tasks. They are
                 accurate, robust, and quick to apply to test instances.
                 Their only potential drawback is their training time
                 and memory requirement. For $n$ training instances held
                 in memory, the best-known SVM implementations take time
                 proportional to $n^a$, where $a$ is typically between
                 1.8 and 2.1. SVMs have been trained on data sets with
                 several thousand instances, but Web directories today
                 contain millions of instances that are valuable for
                 mapping billions of Web pages into Yahoo!-like
                 directories. We present SIMPL, a nearly linear-time
                 classification algorithm that mimics the strengths of
                 SVMs while avoiding the training bottleneck. It uses
                 Fisher's linear discriminant, a classical tool from
                 statistical pattern recognition, to project training
                 instances to a carefully selected low-dimensional
                 subspace before inducing a decision tree on the
                 projected instances. SIMPL uses efficient sequential
                 scans and sorts and is comparable in speed and memory
                 scalability to widely used naive Bayes (NB)
                 classifiers, but it beats NB accuracy decisively. It
                 not only approaches and sometimes exceeds SVM accuracy,
                 but also beats the running time of a popular SVM
                 implementation by orders of magnitude. While describing
                 SIMPL, we make a detailed experimental comparison of
                 SVM-generated discriminants with Fisher's
                 discriminants, and we also report on an analysis of the
                 cache performance of a popular SVM implementation. Our
                 analysis shows that SIMPL has the potential to be the
                 method of choice for practitioners who want the
                 accuracy of SVMs and the simplicity and speed of naive
                 Bayes classifiers.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "discriminative learning; linear discriminants; text
                 classification",
}

@Article{Fung:2003:CDV,
  author =       "Chi-Wai Fung and Kamalakar Karlapalem and Qing Li",
  title =        "Cost-driven vertical class partitioning for methods in
                 object oriented databases",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "3",
  pages =        "187--210",
  month =        oct,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0084-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:07 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In object-oriented databases (OODBs), a method
                 encapsulated in a class typically accesses a few, but
                 not all the instance variables defined in the class. It
                 may thus be preferable to vertically partition the
                 class for reducing irrelevant data (instance variables)
                 accessed by the methods. Our prior work has shown that
                 vertical class partitioning can result in a substantial
                 decrease in the total number of disk accesses incurred
                 for executing a set of applications, but coming up with
                 an optimal vertical class partitioning scheme is a hard
                 problem. In this paper, we present two algorithms for
                 deriving optimal and near-optimal vertical class
                 partitioning schemes. The cost-driven algorithm
                 provides the optimal vertical class partitioning
                 schemes by enumerating, exhaustively, all the schemes
                 and calculating the number of disk accesses required to
                 execute a given set of applications. For this, a cost
                 model for executing a set of methods in an OODB system
                 is developed. Since exhaustive enumeration is costly
                 and only works for classes with a small number of
                 instance variables, a hill-climbing heuristic algorithm
                 (HCHA) is developed, which takes the solution provided
                 by the affinity-based algorithm and improves it,
                 thereby further reducing the total number of disk
                 accesses incurred. We show that the HCHA algorithm
                 provides a reasonable near-optimal vertical class
                 partitioning scheme for executing a given set of
                 applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "affinity-based; analytical cost model; cost-driven;
                 hill-climbing heuristic algorithm; method-induced;
                 object-oriented databases; vertical class
                 partitioning",
}

@Article{Li:2003:CCA,
  author =       "Chen Li",
  title =        "Computing complete answers to queries in the presence
                 of limited access patterns",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "3",
  pages =        "211--227",
  month =        oct,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-002-0085-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:07 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In data applications such as information integration,
                 there can be limited access patterns to relations,
                 i.e., binding patterns require values to be specified
                 for certain attributes in order to retrieve data from a
                 relation. As a consequence, we cannot retrieve all
                 tuples from these relations. In this article we study
                 the problem of computing the {\em complete\/} answer to
                 a query, i.e., the answer that could be computed if all
                 the tuples could be retrieved. A query is {\em
                 stable\/} if for any instance of the relations in the
                 query, its complete answer can be computed using the
                 access patterns permitted by the relations. We study
                 the problem of testing stability of various classes of
                 queries, including conjunctive queries, unions of
                 conjunctive queries, and conjunctive queries with
                 arithmetic comparisons. We give algorithms and
                 complexity results for these classes of queries. We
                 show that stability of datalog programs is undecidable,
                 and give a sufficient condition for stability of
                 datalog queries. Finally, we study data-dependent
                 computability of the complete answer to a nonstable
                 query, and propose a decision tree for guiding the
                 process to compute the complete answer.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "complete answers to queries; limited access patterns
                 to relations; query stability",
}

@Article{Chua:2003:IBA,
  author =       "Cecil Eng H. Chua and Roger H. L. Chiang and Ee-Peng
                 Lim",
  title =        "Instance-based attribute identification in database
                 integration",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "3",
  pages =        "228--243",
  month =        oct,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0088-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:07 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Most research on attribute identification in database
                 integration has focused on integrating attributes using
                 schema and summary information derived from the
                 attribute values. No research has attempted to fully
                 explore the use of attribute values to perform
                 attribute identification. We propose an attribute
                 identification method that employs schema and summary
                 instance information as well as properties of
                 attributes derived from their instances. Unlike other
                 attribute identification methods that match only single
                 attributes, our method matches attribute groups for
                 integration. Because our attribute identification
                 method fully explores data instances, it can identify
                 corresponding attributes to be integrated even when
                 schema information is misleading. Three experiments
                 were performed to validate our attribute identification
                 method. In the first experiment, the heuristic rules
                 derived for attribute classification were evaluated on
                 119 attributes from nine public domain data sets. The
                 second was a controlled experiment validating the
                 robustness of the proposed attribute identification
                 method by introducing erroneous data. The third
                 experiment evaluated the proposed attribute
                 identification method on five data sets extracted from
                 online music stores. The results demonstrated the
                 viability of the proposed method.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute identification; database integration;
                 measures of association",
}

@Article{Helmer:2003:PSF,
  author =       "Sven Helmer and Guido Moerkotte",
  title =        "A performance study of four index structures for
                 set-valued attributes of low cardinality",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "3",
  pages =        "244--261",
  month =        oct,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0106-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:07 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The efficient retrieval of data items on set-valued
                 attributes is an important research topic that has
                 attracted little attention so far. We studied and
                 modified four index structures (sequential signature
                 files, signature trees, extendible signature hashing,
                 and inverted files) for a fast retrieval of sets with
                 low cardinality. We compared the index structures by
                 implementing them and subjecting them to extensive
                 experiments, investigating the influence of query set
                 size, database size, domain size, and data distribution
                 (synthetic and real). The results of the experiments
                 clearly indicate that inverted files exhibit the best
                 overall behavior of all tested index structures.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; database management systems; index
                 structures; physical design; set-valued attributes",
}

@Article{Yang:2003:ICM,
  author =       "Jun Yang and Jennifer Widom",
  title =        "Incremental computation and maintenance of temporal
                 aggregates",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "3",
  pages =        "262--283",
  month =        oct,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0107-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:07 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider the problems of computing aggregation
                 queries in temporal databases and of maintaining
                 materialized temporal aggregate views efficiently. The
                 latter problem is particularly challenging since a
                 single data update can cause aggregate results to
                 change over the entire time line. We introduce a new
                 index structure called the {\em SB-tree}, which
                 incorporates features from both {\em segment-trees\/}
                 and {\em B-trees}. SB-trees support fast lookup of
                 aggregate results based on time and can be maintained
                 efficiently when the data change. We extend the basic
                 SB-tree index to handle {\em cumulative\/} (also called
                 {\em moving-window\/}) aggregates, considering
                 separatelycases when the window size is or is not fixed
                 in advance. For materialized aggregate views in a
                 temporal database or warehouse, we propose building and
                 maintaining SB-tree indices instead of the views
                 themselves.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; aggregation; B-tree; segment tree;
                 temporal database; view maintenance",
}

@Article{Atluri:2003:GE,
  author =       "Vijay Atluri and Anupam Joshi and Yelena Yesha",
  title =        "Guest editorial",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "285--285",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0109-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Maedche:2003:MMD,
  author =       "A. Maedche and B. Motik and L. Stojanovic",
  title =        "Managing multiple and distributed ontologies on the
                 {Semantic Web}",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "286--302",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0102-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In traditional software systems, significant attention
                 is devoted to keeping modules well separated and
                 coherent with respect to functionality, thus ensuring
                 that changes in the system are localized to a handful
                 of modules. Reuse is seen as the key method in reaching
                 that goal. Ontology-based systems on the Semantic Web
                 are just a special class of software systems, so the
                 same principles apply. In this article, we present an
                 integrated framework for managing multiple and
                 distributed ontologies on the Semantic Web. It is based
                 on the representation model for ontologies, trading off
                 between expressivity and tractability. In our
                 framework, we provide features for reusing existing
                 ontologies and for evolving them while retaining the
                 consistency. The approach is implemented within KAON,
                 the Karlsruhe Ontology and Semantic Web tool suite.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multiple and distributed ontologies; ontology
                 evolution",
}

@Article{Doan:2003:LMO,
  author =       "AnHai Doan and Jayant Madhavan and Robin Dhamankar and
                 Pedro Domingos and Alon Halevy",
  title =        "Learning to match ontologies on the {Semantic Web}",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "303--319",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0104-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "On the Semantic Web, data will inevitably come from
                 many different ontologies, and information processing
                 across ontologies is not possible without knowing the
                 semantic mappings between them. Manually finding such
                 mappings is tedious, error-prone, and clearly not
                 possible on the Web scale. Hence the development of
                 tools to assist in the ontology mapping process is
                 crucial to the success of the Semantic Web. We describe
                 {\em GLUE}, a system that employs machine learning
                 techniques to find such mappings. Given two ontologies,
                 for each concept in one ontology {\em GLUE\/} finds the
                 most similar concept in the other ontology. We give
                 well-founded probabilistic definitions to several
                 practical similarity measures and show that {\em
                 GLUE\/} can work with all of them. Another key feature
                 of {\em GLUE\/} is that it uses multiple learning
                 strategies, each of which exploits well a different
                 type of information either in the data instances or in
                 the taxonomic structure of the ontologies. To further
                 improve matching accuracy, we extend {\em GLUE\/} to
                 incorporate common-sense knowledge and domain
                 constraints into the matching process. Our approach is
                 thus distinguished in that it works with a variety of
                 well-defined similarity notions and that it efficiently
                 incorporates multiple types of knowledge. We describe a
                 set of experiments on several real-world domains and
                 show that {\em GLUE\/} proposes highly accurate
                 semantic mappings. Finally, we extend {\em GLUE\/} to
                 find complex mappings between ontologies and describe
                 experiments that show the promise of the approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "machine learning; ontology matching; relaxation
                 labeling; Semantic Web",
}

@Article{Halkidi:2003:TOW,
  author =       "Maria Halkidi and Benjamin Nguyen and Iraklis Varlamis
                 and Michalis Vazirgiannis",
  title =        "{THESUS}: Organizing {Web} document collections based
                 on link semantics",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "320--332",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0100-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The requirements for effective search and management
                 of the WWW are stronger than ever. Currently Web
                 documents are classified based on their content not
                 taking into account the fact that these documents are
                 connected to each other by links. We claim that a
                 page's classification is enriched by the detection of
                 its incoming links' semantics. This would enable
                 effective browsing and enhance the validity of search
                 results in the WWW context. Another aspect that is
                 underaddressed and strictly related to the tasks of
                 browsing and searching is the similarity of documents
                 at the semantic level. The above observations lead us
                 to the adoption of a hierarchy of concepts (ontology)
                 and a thesaurus to exploit links and provide a better
                 characterization of Web documents. The enhancement of
                 document characterization makes operations such as
                 clustering and labeling very interesting. To this end,
                 we devised a system called THESUS. The system deals
                 with an initial sets of Web documents, extracts
                 keywords from all pages' incoming links, and converts
                 them to semantics by mapping them to a domain's
                 ontology. Then a clustering algorithm is applied to
                 discover groups of Web documents. The effectiveness of
                 the clustering process is based on the use of a novel
                 similarity measure between documents characterized by
                 sets of terms. Web documents are organized into
                 thematic subsets based on their semantics. The subsets
                 are then labeled, thereby enabling easier management
                 (browsing, searching, querying) of the Web. In this
                 article, we detail the process of this system and give
                 an experimental analysis of its results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "document clustering; link analysis; link management;
                 semantics; similarity measure; World Wide Web",
}

@Article{Medjahed:2003:CWS,
  author =       "Brahim Medjahed and Athman Bouguettaya and Ahmed K.
                 Elmagarmid",
  title =        "Composing {Web} services on the {Semantic Web}",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "333--351",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0101-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Service composition is gaining momentum as the
                 potential {\em silver bullet\/} for the envisioned {\em
                 Semantic Web}. It purports to take the Web to
                 unexplored efficiencies and provide a flexible approach
                 for promoting all types of activities in tomorrow's
                 Web. Applications expected to heavily take advantage of
                 Web service composition include B2B E-commerce and
                 E-government. To date, enabling composite services has
                 largely been an ad hoc, time-consuming, and error-prone
                 process involving repetitive low-level programming. In
                 this paper, we propose an {\em ontology\/}-based
                 framework for the automatic composition of Web
                 services. We present a technique to generate composite
                 services from high-level declarative descriptions. We
                 define formal safeguards for meaningful composition
                 through the use of {\em composability\/} rules. These
                 rules compare the {\em syntactic\/} and {\em
                 semantic\/} features of Web services to determine
                 whether two services are composable. We provide an
                 implementation using an E-government application
                 offering customized services to indigent citizens.
                 Finally, we present an exhaustive performance
                 experiment to assess the scalability of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "ontology; Semantic Web; service composition; Web
                 services",
}

@Article{Fileto:2003:POW,
  author =       "Renato Fileto and Ling Liu and Calton Pu and Eduardo
                 Delgado Assad and Claudia Bauzer Medeiros",
  title =        "{POESIA}: an ontological workflow approach for
                 composing {Web} services in agriculture",
  journal =      j-VLDB-J,
  volume =       "12",
  number =       "4",
  pages =        "352--367",
  month =        nov,
  year =         "2003",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0103-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper describes the POESIA approach to systematic
                 composition of Web services. This pragmatic approach is
                 strongly centered in the use of domain-specific
                 multidimensional ontologies. Inspired by applications
                 needs and founded on ontologies, workflows, and
                 activity models, POESIA provides well-defined
                 operations (aggregation, specialization, and
                 instantiation) to support the composition of Web
                 services. POESIA complements current proposals for Web
                 services definition and composition by providing a
                 higher degree of abstraction with verifiable
                 consistency properties. We illustrate the POESIA
                 approach using a concrete application scenario in
                 agroenvironmental planning.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "composition of Web services; data integration;
                 ontologies; Semantic Web; semantics of data and
                 processes",
}

@Article{Jensen:2004:MDM,
  author =       "Christian S. Jensen and Augustas Kligys and Torben
                 Bach Pedersen and Igor Timko",
  title =        "Multidimensional data modeling for location-based
                 services",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "1",
  pages =        "1--21",
  month =        jan,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0091-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:09 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the recent and continuing advances in areas such
                 as wireless communications and positioning
                 technologies, mobile, location-based services are
                 becoming possible. Such services deliver
                 location-dependent content to their users. More
                 specifically, these services may capture the movements
                 and requests of their users in multidimensional
                 databases, i.e., data warehouses, and content delivery
                 may be based on the results of complex queries on these
                 data warehouses. Such queries aggregate detailed data
                 in order to find useful patterns, e.g., in the
                 interaction of a particular user with the services. The
                 application of multidimensional technology in this
                 context poses a range of new challenges. The specific
                 challenge addressed here concerns the provision of an
                 appropriate multidimensional data model. In particular,
                 the paper extends an existing multidimensional data
                 model and algebraic query language to accommodate
                 spatial values that exhibit partial containment
                 relationships instead of the total containment
                 relationships normally assumed in multidimensional data
                 models. Partial containment introduces imprecision in
                 aggregation paths. The paper proposes a method for
                 evaluating the imprecision of such paths. The paper
                 also offers transformations of dimension hierarchies
                 with partial containment relationships to simple
                 hierarchies, to which existing precomputation
                 techniques are applicable.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data modeling; location-based services;
                 multidimensional data; partial containment",
}

@Article{Zhang:2004:PMV,
  author =       "Xin Zhang and Lingli Ding and Elke A. Rundensteiner",
  title =        "Parallel multisource view maintenance",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "1",
  pages =        "22--48",
  month =        jan,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0086-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:09 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In a distributed environment, materialized views are
                 used to integrate data from different information
                 sources and then store them in some centralized
                 location. In order to maintain such materialized views,
                 maintenance queries need to be sent to information
                 sources by the data warehouse management system. Due to
                 the independence of the information sources and the
                 data warehouse, concurrency issues are raised between
                 the maintenance queries and the local update
                 transactions at each information source. Recent
                 solutions such as ECA and Strobe tackle such concurrent
                 maintenance, however with the requirement of quiescence
                 of the information sources. SWEEP and POSSE overcome
                 this limitation by decomposing the global maintenance
                 query into smaller subqueries to be sent to every
                 information source and then performing conflict
                 correction locally at the data warehouse. Note that all
                 these previous approaches handle the data updates {\em
                 one at a time}. Hence either some of the information
                 sources or the data warehouse is likely to be idle
                 during most of the maintenance process. In this paper,
                 we propose that a set of updates should be maintained
                 in parallel by several concurrent maintenance processes
                 so that both the information sources as well as the
                 warehouse would be utilized more fully throughout the
                 maintenance process. This parallelism should then
                 improve the overall maintenance performance. For this
                 we have developed a parallel view maintenance
                 algorithm, called PVM, that substantially improves upon
                 the performance of previous maintenance approaches by
                 handling a set of data updates at the same time. The
                 parallel handling of a set of updates is orthogonal to
                 the particular maintenance algorithm applied to the
                 handling of each individual update. In order to perform
                 parallel view maintenance, we have identified two
                 critical issues that must be overcome: (1) detecting
                 maintenance-concurrent data updates in a parallel mode
                 and (2) correcting the problem that the data warehouse
                 commit order may not correspond to the data warehouse
                 update processing order due to parallel maintenance
                 handling. In this work, we provide solutions to both
                 issues. For the former, we insert a middle-layer
                 timestamp assignment module for detecting
                 maintenance-concurrent data updates without requiring
                 any global clock synchronization. For the latter, we
                 introduce the negative counter concept to solve the
                 problem of variant orders of committing effects of data
                 updates to the data warehouse. We provide a proof of
                 the correctness of PVM that guarantees that our
                 strategy indeed generates the correct final data
                 warehouse state. We have implemented both SWEEP and PVM
                 in our EVE data warehousing system. Our performance
                 study demonstrates that a manyfold performance
                 improvement is achieved by PVM over SWEEP.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrent data updates; data warehousing; parallel
                 view maintenance; performance evaluation",
}

@Article{Hristidis:2004:AAA,
  author =       "Vagelis Hristidis and Yannis Papakonstantinou",
  title =        "Algorithms and applications for answering ranked
                 queries using ranked views",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "1",
  pages =        "49--70",
  month =        jan,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0099-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:09 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Ranked queries return the top objects of a database
                 according to a preference function. We present and
                 evaluate (experimentally and theoretically) a core
                 algorithm that answers ranked queries in an efficient
                 pipelined manner using materialized ranked views. We
                 use and extend the core algorithm in the described
                 PREFER and MERGE systems. PREFER precomputes a set of
                 materialized views that provide guaranteed query
                 performance. We present an algorithm that selects a
                 near optimal set of views under space constraints. We
                 also describe multiple optimizations and implementation
                 aspects of the downloadable version of PREFER. Then we
                 discuss MERGE, which operates at a metabroker and
                 answers ranked queries by retrieving a minimal number
                 of objects from sources that offer ranked queries. A
                 speculative version of the pipelining algorithm is
                 described.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "materialization; merge ranked views; ranked queries",
}

@Article{Khan:2004:REO,
  author =       "Latifur Khan and Dennis McLeod and Eduard Hovy",
  title =        "Retrieval effectiveness of an ontology-based model for
                 information selection",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "1",
  pages =        "71--85",
  month =        jan,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0105-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:09 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Technology in the field of digital media generates
                 huge amounts of nontextual information, audio, video,
                 and images, along with more familiar textual
                 information. The potential for exchange and retrieval
                 of information is vast and daunting. The key problem in
                 achieving efficient and user-friendly retrieval is the
                 development of a search mechanism to guarantee delivery
                 of minimal irrelevant information (high precision)
                 while insuring relevant information is not overlooked
                 (high recall). The traditional solution employs
                 keyword-based search. The only documents retrieved are
                 those containing user-specified keywords. But many
                 documents convey desired semantic information without
                 containing these keywords. This limitation is
                 frequently addressed through query expansion mechanisms
                 based on the statistical co-occurrence of terms. Recall
                 is increased, but at the expense of deteriorating
                 precision. One can overcome this problem by indexing
                 documents according to context and meaning rather than
                 keywords, although this requires a method of converting
                 words to meanings and the creation of a meaning-based
                 index structure. We have solved the problem of an index
                 structure through the design and implementation of a
                 concept-based model using domain-dependent ontologies.
                 An ontology is a collection of concepts and their
                 interrelationships that provide an abstract view of an
                 application domain. With regard to converting words to
                 meaning, the key issue is to identify appropriate
                 concepts that both describe and identify documents as
                 well as language employed in user requests. This paper
                 describes an automatic mechanism for selecting these
                 concepts. An important novelty is a scalable
                 disambiguation algorithm that prunes irrelevant
                 concepts and allows relevant ones to associate with
                 documents and participate in query generation. We also
                 propose an automatic query expansion mechanism that
                 deals with user requests expressed in natural language.
                 This mechanism generates database queries with
                 appropriate and relevant expansion through knowledge
                 encoded in ontology form. Focusing on audio data, we
                 have constructed a demonstration prototype. We have
                 experimentally and analytically shown that our model,
                 compared to keyword search, achieves a significantly
                 higher degree of precision and recall. The techniques
                 employed can be applied to the problem of information
                 selection in all media types.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "audio; metadata; ontology; precision; recall; SQL",
}

@Article{Donderler:2004:RBS,
  author =       "Mehmet Emin D{\"o}nderler and {\"O}zg{\"u}r Ulusoy and
                 Ugur G{\"u}d{\"u}kbay",
  title =        "Rule-based spatiotemporal query processing for video
                 databases",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "1",
  pages =        "86--103",
  month =        jan,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0114-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:09 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In our earlier work, we proposed an architecture for a
                 Web-based video database management system (VDBMS)
                 providing an integrated support for spatiotemporal and
                 semantic queries. In this paper, we focus on the task
                 of spatiotemporal query processing and also propose an
                 SQL-like video query language that has the capability
                 to handle a broad range of spatiotemporal queries. The
                 language is rule-based in that it allows users to
                 express spatial conditions in terms of Prolog-type
                 predicates. Spatiotemporal query processing is carried
                 out in three main stages: query recognition, query
                 decomposition, and query execution.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "content-based retrieval; inference rules; multimedia
                 databases; spatiotemporal query processing; video
                 databases",
}

@Article{Yu:2004:QHD,
  author =       "Cui Yu and St{\'e}phane Bressan and Beng Chin Ooi and
                 Kian-Lee Tan",
  title =        "Querying high-dimensional data in single-dimensional
                 space",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "2",
  pages =        "105--119",
  month =        may,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0121-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we propose a new tunable index scheme,
                 called iMinMax($\theta$), that maps points in
                 high-dimensional spaces to single-dimensional values
                 determined by their maximum or minimum values among all
                 dimensions. By varying the tuning ``knob'', $\theta$,
                 we can obtain different families of iMinMax structures
                 that are optimized for different distributions of data
                 sets. The transformed data can then be indexed using
                 existing single-dimensional indexing structures such as
                 the B$^+$-trees. Queries in the high-dimensional space
                 have to be transformed into queries in the
                 single-dimensional space and evaluated there. We
                 present efficient algorithms for evaluating window
                 queries as range queries on the single-dimensional
                 space. We conducted an extensive performance study to
                 evaluate the effectiveness of the proposed schemes. Our
                 results show that iMinMax($\theta$) outperforms
                 existing techniques, including the Pyramid scheme and
                 VA-file, by a wide margin. We then describe how iMinMax
                 could be used in approximate K-nearest neighbor (KNN)
                 search, and we present a comparative study against the
                 recently proposed iDistance, a specialized KNN indexing
                 method.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "edge; high-dimensional data; iMinMax($\theta$);
                 single-dimensional space; window and KNN queries",
}

@Article{Dori:2004:VVS,
  author =       "Dov Dori",
  title =        "{ViSWeb} --- the {Visual Semantic Web}: unifying human
                 and machine knowledge representations with
                 {Object-Process Methodology}",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "2",
  pages =        "120--147",
  month =        may,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0120-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Visual Semantic Web (ViSWeb) is a new paradigm for
                 enhancing the current Semantic Web technology. Based on
                 Object-Process Methodology (OPM), which enables
                 modeling of systems in a single graphic and textual
                 model, ViSWeb provides for representation of knowledge
                 over the Web in a unified way that caters to human
                 perceptions while also being machine processable. The
                 advantages of the ViSWeb approach include equivalent
                 graphic-text knowledge representation, visual
                 navigability, semantic sentence interpretation,
                 specification of system dynamics, and complexity
                 management. Arguing against the claim that humans and
                 machines need to look at different knowledge
                 representation formats, the principles and basics of
                 various graphic and textual knowledge representations
                 are presented and examined as candidates for ViSWeb
                 foundation. Since OPM is shown to be most adequate for
                 the task, ViSWeb is developed as an OPM-based layer on
                 top of XML/RDF/OWL to express knowledge visually and in
                 natural language. Both the graphic and the textual
                 representations are strictly equivalent. Being
                 intuitive yet formal, they are not only understandable
                 to humans but are also amenable to computer processing.
                 The ability to use such bimodal knowledge
                 representation is potentially a major step forward in
                 the evolution of the Semantic Web.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "conceptual graphs; knowledge representation;
                 object-process methodology; Semantic Web; Visual
                 Semantic Web",
}

@Article{Fu:2004:EHA,
  author =       "Lixin Fu and Sanguthevar Rajasekaran",
  title =        "Evaluating holistic aggregators efficiently for very
                 large datasets",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "2",
  pages =        "148--161",
  month =        may,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0112-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In data warehousing applications, numerous OLAP
                 queries involve the processing of holistic aggregators
                 such as computing the ``top $n$,'' median, quantiles,
                 etc. In this paper, we present a novel approach called
                 dynamic bucketing to efficiently evaluate these
                 aggregators. We partition data into equiwidth buckets
                 and further partition dense buckets into subbuckets as
                 needed by allocating and reclaiming memory space. The
                 bucketing process dynamically adapts to the input order
                 and distribution of input datasets. The histograms of
                 the buckets and subbuckets are stored in our new data
                 structure called structure trees. A recent selection
                 algorithm based on regular sampling is generalized and
                 its analysis extended. We have also compared our new
                 algorithms with this generalized algorithm and several
                 other recent algorithms. Experimental results show that
                 our new algorithms significantly outperform prior ones
                 not only in the runtime but also in accuracy.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "aggregation; dynamic bucketing; quantiles",
}

@Article{Rahal:2004:ETU,
  author =       "Amira Rahal and Qiang Zhu and Per-{\AA}ke Larson",
  title =        "Evolutionary techniques for updating query cost models
                 in a dynamic multidatabase environment",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "2",
  pages =        "162--176",
  month =        may,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0110-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Deriving local cost models for query optimization in a
                 dynamic multidatabase system (MDBS) is a challenging
                 issue. In this paper, we study how to evolve a query
                 cost model to capture a slowly-changing dynamic MDBS
                 environment so that the cost model is kept up-to-date
                 all the time. Two novel evolutionary techniques, i.e.,
                 the shifting method and the block-moving method, are
                 proposed. The former updates a cost model by taking
                 up-to-date information from a new sample query into
                 consideration at each step, while the latter considers
                 a block (batch) of new sample queries at each step. The
                 relevant issues, including derivation of recurrence
                 updating formulas, development of efficient algorithms,
                 analysis and comparison of complexities, and design of
                 an integrated scheme to apply the two methods
                 adaptively, are studied. Our theoretical and
                 experimental results demonstrate that the proposed
                 techniques are quite promising in maintaining accurate
                 cost models efficiently for a slowly changing dynamic
                 MDBS environment. Besides the application to MDBSs, the
                 proposed techniques can also be applied to the
                 automatic maintenance of cost models in self-managing
                 database systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cost model; evolutionary technique; multidatabase;
                 query optimization; self-managing database",
}

@Article{Adi:2004:ASM,
  author =       "Asaf Adi and Opher Etzion",
  title =        "{Amit} --- the situation manager",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "2",
  pages =        "177--203",
  month =        may,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-003-0108-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:10 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper presents the ``situation manager'', a tool
                 that includes both a language and an efficient runtime
                 execution mechanism aimed at reducing the complexity of
                 active applications. This tool follows the observation
                 that in many cases there is a gap between current tools
                 that enable one to react to a single event (following
                 the ECA: event-condition-action paradigm) and the
                 reality in which a single event may not require any
                 reaction; however, the reaction should be given to
                 patterns over the event history. The concept of
                 presented in this paper extends the concept of in its
                 expressive power, flexibility, and usability. This
                 paper motivates the work, surveys other efforts in this
                 area, and discusses both the language and the execution
                 model.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "active databases; active technology; composite events;
                 high-level languages",
}

@Article{Freytag:2004:BPV,
  author =       "Johann-Christoph Freytag and Serge Abiteboul and Mike
                 Carey",
  title =        "Best papers of {VLDB} 2003",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "205--206",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0129-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ilyas:2004:STJ,
  author =       "Ihab F. Ilyas and Walid G. Aref and Ahmed K.
                 Elmagarmid",
  title =        "Supporting top-$k$ join queries in relational
                 databases",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "207--221",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0128-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Ranking queries, also known as top-$k$ queries,
                 produce results that are ordered on some computed
                 score. Typically, these queries involve joins, where
                 users are usually interested only in the top-$k$ join
                 results. Top-$k$ queries are dominant in many emerging
                 applications, e.g., multimedia retrieval by content,
                 Web databases, data mining, middlewares, and most
                 information retrieval applications. Current relational
                 query processors do not handle ranking queries
                 efficiently, especially when joins are involved. In
                 this paper, we address supporting top-$k$ join queries
                 in relational query processors. We introduce a new
                 rank-join algorithm that makes use of the individual
                 orders of its inputs to produce join results ordered on
                 a user-specified scoring function. The idea is to rank
                 the join results progressively during the join
                 operation. We introduce two physical query operators
                 based on variants of ripple join that implement the
                 rank-join algorithm. The operators are nonblocking and
                 can be integrated into pipelined execution plans. We
                 also propose an efficient heuristic designed to
                 optimize a top-$k$ join query by choosing the best join
                 order. We address several practical issues and
                 optimization heuristics to integrate the new join
                 operators in practical query processors. We implement
                 the new operators inside a prototype database engine
                 based on PREDATOR. The experimental evaluation of our
                 approach compares recent algorithms for joining ranked
                 inputs and shows superior performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "query operators; rank aggregarion; ranking; top-$k$
                 queries",
}

@Article{Papadimitriou:2004:AUS,
  author =       "Spiros Papadimitriou and Anthony Brockwell and
                 Christos Faloutsos",
  title =        "Adaptive, unsupervised stream mining",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "222--239",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0130-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sensor devices and embedded processors are becoming
                 widespread, especially in measurement/monitoring
                 applications. Their limited resources (CPU, memory
                 and/or communication bandwidth, and power) pose some
                 interesting challenges. We need concise, expressive
                 models to represent the important features of the data
                 and that lend themselves to efficient estimation. In
                 particular, under these severe constraints, we want
                 models and estimation methods that (a) require little
                 memory and a single pass over the data, (b) can adapt
                 and handle arbitrary periodic components, and (c) can
                 deal with various types of noise. We propose
                 ${\mathrm{AWSOM}}$ (Arbitrary Window Stream mOdeling
                 Method), which allows sensors in remote or hostile
                 environments to efficiently and effectively discover
                 interesting patterns and trends. This can be done
                 automatically, i.e., with no prior inspection of the
                 data or any user intervention and expert tuning before
                 or during data gathering. Our algorithms require
                 limited resources and can thus be incorporated into
                 sensors --- possibly alongside a distributed query
                 processing engine [10,6,27]. Updates are performed in
                 constant time with respect to stream size using
                 logarithmic space. Existing forecasting methods
                 (SARIMA, GARCH, etc.) and ``traditional'' Fourier and
                 wavelet analysis fall short on one or more of these
                 requirements. To the best of our knowledge,
                 ${\mathrm{AWSOM}}$ is the first framework that combines
                 all of the above characteristics. Experiments on real
                 and synthetic datasets demonstrate that
                 ${\mathrm{AWSOM}}$ discovers meaningful patterns over
                 long time periods. Thus, the patterns can also be used
                 to make long-range forecasts, which are notoriously
                 difficult to perform. In fact, ${\mathrm{AWSOM}}$
                 outperforms manually set up autoregressive models, both
                 in terms of long-term pattern detection and modeling
                 and by at least $10 \times$ in resource consumption.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Labrinidis:2004:ETB,
  author =       "Alexandros Labrinidis and Nick Roussopoulos",
  title =        "Exploring the tradeoff between performance and data
                 freshness in database-driven {Web} servers",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "240--255",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0131-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Personalization, advertising, and the sheer volume of
                 online data generate a staggering amount of dynamic Web
                 content. In addition to Web caching, view
                 materialization has been shown to accelerate the
                 generation of dynamic Web content. View materialization
                 is an attractive solution as it decouples the serving
                 of access requests from the handling of updates. In the
                 context of the Web, selecting which views to
                 materialize must be decided online and needs to
                 consider both performance and data freshness, which we
                 refer to as the online view selection problem. In this
                 paper, we define data freshness metrics, provide an
                 adaptive algorithm for the online view selection
                 problem that is based on user-specified data freshness
                 requirements, and present experimental results.
                 Furthermore, we examine alternative metrics for data
                 freshness and extend our proposed algorithm to handle
                 multiple users and alternative definitions of data
                 freshness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{He:2004:AIW,
  author =       "Hai He and Weiyi Meng and Clement Yu and Zonghuan
                 Wu",
  title =        "Automatic integration of {Web} search interfaces with
                 {WISE}-Integrator",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "256--273",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0126-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "An increasing number of databases are becoming Web
                 accessible through form-based search interfaces, and
                 many of these sources are database-driven e-commerce
                 sites. It is a daunting task for users to access
                 numerous Web sites individually to get the desired
                 information. Hence, providing a unified access to
                 multiple e-commerce search engines selling similar
                 products is of great importance in allowing users to
                 search and compare products from multiple sites with
                 ease. One key task for providing such a capability is
                 to integrate the Web search interfaces of these
                 e-commerce search engines so that user queries can be
                 submitted against the integrated interface. Currently,
                 integrating such search interfaces is carried out
                 either manually or semiautomatically, which is
                 inefficient and difficult to maintain. In this paper,
                 we present WISE-Integrator --- a tool that performs
                 automatic integration of Web Interfaces of Search
                 Engines. WISE-Integrator explores a rich set of special
                 metainformation that exists in Web search interfaces
                 and uses the information to identify matching
                 attributes from different search interfaces for
                 integration. It also resolves domain differences of
                 matching attributes. In this paper, we also discuss how
                 to automatically extract information from search
                 interfaces that is needed by WISE-Integrator to perform
                 automatic interface integration. Our experimental
                 results, based on 143 real-world search interfaces in
                 four different domains, indicate that WISE-Integrator
                 can achieve high attribute matching accuracy and can
                 produce high-quality integrated search interfaces
                 without human interactions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute matching; interface extraction; metasearch;
                 schema integration; Web search interface integration",
}

@Article{Velegrakis:2004:PMC,
  author =       "Yannis Velegrakis and Ren{\'e} J. Miller and Lucian
                 Popa",
  title =        "Preserving mapping consistency under schema changes",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "274--293",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0136-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In dynamic environments like the Web, data sources may
                 change not only their data but also their schemas,
                 their semantics, and their query capabilities. When a
                 mapping is left inconsistent by a schema change, it has
                 to be detected and updated. We present a novel
                 framework and a tool (ToMAS) for automatically adapting
                 (rewriting) mappings as schemas evolve. Our approach
                 considers not only local changes to a schema but also
                 changes that may affect and transform many components
                 of a schema. Our algorithm detects mappings affected by
                 structural or constraint changes and generates all the
                 rewritings that are consistent with the semantics of
                 the changed schemas. Our approach explicitly models
                 mapping choices made by a user and maintains these
                 choices, whenever possible, as the schemas and mappings
                 evolve. When there is more than one candidate
                 rewriting, the algorithm may rank them based on how
                 close they are to the semantics of the existing
                 mappings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Florescu:2004:BSX,
  author =       "Daniela Florescu and Chris Hillery and Donald Kossmann
                 and Paul Lucas and Fabio Riccardi and Till Westmann and
                 J. Carey and Arvind Sundararajan",
  title =        "The {BEA} streaming {XQuery} processor",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "3",
  pages =        "294--315",
  month =        sep,
  year =         "2004",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-004-0137-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:11 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper describes the design, implementation, and
                 performance characteristics of a commercial XQuery
                 processing engine, the BEA streaming XQuery processor.
                 This XQuery engine was designed to provide high
                 performance for message-processing applications, i.e.,
                 for transforming XML data streams. The engine is a
                 central component of the 8.1 release of BEA's
                 WebLogic Integration (WLI) product. The BEA XQuery
                 engine is fully compliant with the August 2002 draft of
                 the W3C XML Query Language specification and we are
                 currently porting it to the latest version of the
                 XQuery language (July 2004). A goal of this paper is to
                 describe how a fully compliant yet efficient XQuery
                 engine has been built from a few relatively simple
                 components and well-understood technologies.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gehrke:2004:GES,
  author =       "Johannes Gehrke and M. Hellerstein",
  title =        "{Guest Editorial} to the special issue on data stream
                 processing",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "317--317",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2004:FHQ,
  author =       "Huai Yang and Li Lee and Wynne Hsu",
  title =        "Finding hot query patterns over an {XQuery} stream",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "318--332",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Caching query results is one efficient approach to
                 improving the performance of XML management systems.
                 This entails the discovery of frequent XML queries
                 issued by users. In this paper, we model user queries
                 as a stream of XML query pattern trees and mine the
                 frequent query patterns over the query stream. To
                 facilitate the one-pass mining process, we devise a
                 novel data structure called DTS to summarize the
                 pattern trees seen so far. By grouping the incoming
                 pattern trees into batches, we can dynamically mark the
                 active portion of the current batch in DTS and limit
                 the enumeration of candidate trees to only the
                 currently active pattern trees. We also design another
                 summary data structure called ECTree that provides for
                 the incremental computation of the frequent tree
                 patterns over the query stream. Based on the above two
                 constructs, we present two mining algorithms called
                 XQSMinerI and XQSMinerII. XQSMinerI is fast, but it
                 tends to overestimate, while XQSMinerII adopts a
                 filter-and-refine approach to minimize the amount of
                 overestimation. Experimental results show that the
                 proposed methods are both efficient and scalable and
                 require only small memory footprints.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "frequent pattern mining; pattern tree; stream mining;
                 tree mining; XML query pattern",
}

@Article{Babcock:2004:OSD,
  author =       "Brian Babcock and Shivnath Babu and Mayur Datar and
                 Rajeev Motwani and Dilys Thomas",
  title =        "Operator scheduling in data stream systems",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "333--353",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In many applications involving continuous data
                 streams, data arrival is bursty and data rate
                 fluctuates over time. Systems that seek to give rapid
                 or real-time query responses in such an environment
                 must be prepared to deal gracefully with bursts in data
                 arrival without compromising system performance. We
                 discuss one strategy for processing bursty streams ---
                 adaptive, load-aware scheduling of query operators to
                 minimize resource consumption during times of peak
                 load. We show that the choice of an operator scheduling
                 strategy can have significant impact on the runtime
                 system memory usage as well as output latency. Our aim
                 is to design a scheduling strategy that minimizes the
                 maximum runtime system memory while maintaining the
                 output latency within prespecified bounds. We first
                 present Chain scheduling, an operator scheduling
                 strategy for data stream systems that is near-optimal
                 in minimizing runtime memory usage for any collection
                 of single-stream queries involving selections,
                 projections, and foreign-key joins with stored
                 relations. Chain scheduling also performs well for
                 queries with sliding-window joins over multiple streams
                 and multiple queries of the above types. However,
                 during bursts in input streams, when there is a buildup
                 of unprocessed tuples, Chain scheduling may lead to
                 high output latency. We study the online problem of
                 minimizing maximum runtime memory, subject to a
                 constraint on maximum latency. We present preliminary
                 observations, negative results, and heuristics for this
                 problem. A thorough experimental evaluation is provided
                 where we demonstrate the potential benefits of Chain
                 scheduling and its different variants, compare it with
                 competing scheduling strategies, and validate our
                 analytical conclusions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data streams; latency; memory management; scheduling",
}

@Article{Ganguly:2004:TSE,
  author =       "Sumit Ganguly and Minos Garofalakis and Rajeev
                 Rastogi",
  title =        "Tracking set-expression cardinalities over continuous
                 update streams",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "354--369",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "There is growing interest in algorithms for processing
                 and querying continuous data streams (i.e., data seen
                 only once in a fixed order) with limited memory
                 resources. In its most general form, a data stream is
                 actually an update stream, i.e., comprising data-item
                 deletions as well as insertions. Such massive update
                 streams arise naturally in several application domains
                 (e.g., monitoring of large IP network installations or
                 processing of retail-chain transactions). Estimating
                 the cardinality of set expressions defined over several
                 (possibly distributed) update streams is perhaps one of
                 the most fundamental query classes of interest; as an
                 example, such a query may ask ``what is the number of
                 distinct IP source addresses seen in passing packets
                 from both router $R_1$ and $R_2$ but not router
                 $R_3$?''. Earlier work only addressed very restricted
                 forms of this problem, focusing solely on the special
                 case of insert-only streams and specific operators
                 (e.g., union). In this paper, we propose the first
                 space-efficient algorithmic solution for estimating the
                 cardinality of full-fledged set expressions over
                 general update streams. Our estimation algorithms are
                 probabilistic in nature and rely on a novel, hash-based
                 synopsis data structure, termed ''2-level hash
                 sketch''. We demonstrate how our 2-level hash sketch
                 synopses can be used to provide low-error,
                 high-confidence estimates for the cardinality of set
                 expressions (including operators such as set union,
                 intersection, and difference) over continuous update
                 streams, using only space that is significantly
                 sublinear in the sizes of the streaming input
                 (multi-)sets. Furthermore, our estimators never require
                 rescanning or resampling of past stream items,
                 regardless of the number of deletions in the stream. We
                 also present lower bounds for the problem,
                 demonstrating that the space usage of our estimation
                 algorithms is within small factors of the optimal.
                 Finally, we propose an optimized, time-efficient stream
                 synopsis (based on 2-level hash sketches) that provides
                 similar, strong accuracy-space guarantees while
                 requiring only guaranteed logarithmic maintenance time
                 per update, thus making our methods applicable for
                 truly rapid-rate data streams. Our results from an
                 empirical study of our synopsis and estimation
                 techniques verify the effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate query processing; data streams; data
                 synopses; randomized algorithms; set expressions",
}

@Article{Balakrishnan:2004:RA,
  author =       "Hari Balakrishnan and Magdalena Balazinska and Don
                 Carney and U{\=g}ur {\c{C}}etintemel and Mitch
                 Cherniack and Christian Convey and Eddie Galvez and Jon
                 Salz and Michael Stonebraker and Nesime Tatbul and
                 Richard Tibbetts and Stan Zdonik",
  title =        "Retrospective on {Aurora}",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "370--383",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This experience paper summarizes the key lessons we
                 learned throughout the design and implementation of the
                 Aurora stream-processing engine. For the past 2 years,
                 we have built five stream-based applications using
                 Aurora. We first describe in detail these applications
                 and their implementation in Aurora. We then reflect on
                 the design of Aurora based on this experience. Finally,
                 we discuss our initial ideas on a follow-on project,
                 called Borealis, whose goal is to eliminate the
                 limitations of Aurora as well as to address new key
                 challenges and applications in the stream-processing
                 domain.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data stream management; distributed stream processing;
                 monitoring applications; quality-of-service;
                 stream-processing engines",
}

@Article{Sharaf:2004:BEE,
  author =       "A. Sharaf and Jonathan Beaver and Alexandros
                 Labrinidis and K. Chrysanthis",
  title =        "Balancing energy efficiency and quality of aggregate
                 data in sensor networks",
  journal =      j-VLDB-J,
  volume =       "13",
  number =       "4",
  pages =        "384--403",
  month =        dec,
  year =         "2004",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:12 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In-network aggregation has been proposed as one method
                 for reducing energy consumption in sensor networks. In
                 this paper, we explore two ideas related to further
                 reducing energy consumption in the context of
                 in-network aggregation. The first is by influencing the
                 construction of the routing trees for sensor networks
                 with the goal of reducing the size of transmitted data.
                 To this end, we propose a group-aware network
                 configuration method that ``clusters'' along the same
                 path sensor nodes that belong to the same group. The
                 second idea involves imposing a hierarchy of output
                 filters on the sensor network with the goal of both
                 reducing the size of transmitted data and minimizing
                 the number of transmitted messages. More specifically,
                 we propose a framework to use temporal coherency
                 tolerances in conjunction with in-network aggregation
                 to save energy at the sensor nodes while maintaining
                 specified quality of data. These tolerances are based
                 on user preferences or can be dictated by the network
                 in cases where the network cannot support the current
                 tolerance level. Our framework, called TiNA, works on
                 top of existing in-network aggregation schemes. We
                 evaluate experimentally our proposed schemes in the
                 context of existing in-network aggregation schemes. We
                 present experimental results measuring energy
                 consumption, response time, and quality of data for
                 Group-By queries. Overall, our schemes provide
                 significant energy savings with respect to
                 communication and a negligible drop in quality of
                 data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "in-network query processing; power-aware computing;
                 semantic routing; sensor networks",
}

@Article{Ozsu:2005:E,
  author =       "Tamer {\"O}zsu",
  title =        "Editorial",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "1--1",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2005:JOT,
  author =       "Dengfeng Gao and S. Jensen and T. Snodgrass and D.
                 Soo",
  title =        "Join operations in temporal databases",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "2--29",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Joins are arguably the most important relational
                 operators. Poor implementations are tantamount to
                 computing the Cartesian product of the input relations.
                 In a temporal database, the problem is more acute for
                 two reasons. First, conventional techniques are
                 designed for the evaluation of joins with equality
                 predicates rather than the inequality predicates
                 prevalent in valid-time queries. Second, the presence
                 of temporally varying data dramatically increases the
                 size of a database. These factors indicate that
                 specialized techniques are needed to efficiently
                 evaluate temporal joins. We address this need for
                 efficient join evaluation in temporal databases. Our
                 purpose is twofold. We first survey all previously
                 proposed temporal join operators. While many temporal
                 join operators have been defined in previous work, this
                 work has been done largely in isolation from competing
                 proposals, with little, if any, comparison of the
                 various operators. We then address evaluation
                 algorithms, comparing the applicability of various
                 algorithms to the temporal join operators and
                 describing a performance study involving algorithms for
                 one important operator, the temporal equijoin. Our
                 focus, with respect to implementation, is on
                 non-index-based join algorithms. Such algorithms do not
                 rely on auxiliary access paths but may exploit sort
                 orderings to achieve efficiency.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute skew; interval join; partition join;
                 sort-merge join; temporal Cartesian product; temporal
                 join; timestamp skew",
}

@Article{Balmin:2005:SQX,
  author =       "Andrey Balmin and Yannis Papakonstantinou",
  title =        "Storing and querying {XML} data using denormalized
                 relational databases",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "30--49",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "XML database systems emerge as a result of the
                 acceptance of the XML data model. Recent works have
                 followed the promising approach of building XML
                 database management systems on underlying
                 RDBMS's. Achieving query processing performance
                 reduces to two questions: (i) How should the XML data
                 be decomposed into data that are stored in the RDBMS?
                 (ii) How should the XML query be translated into an
                 efficient plan that sends one or more SQL queries to
                 the underlying RDBMS and combines the data into the XML
                 result? We provide a formal framework for XML
                 Schema-driven decompositions, which encompasses the
                 decompositions proposed in prior work and extends them
                 with decompositions that employ denormalized tables and
                 binary-coded XML fragments. We provide corresponding
                 query processing algorithms that translate the XML
                 query conditions into conditions on the relational
                 tables and assemble the decomposed data into the XML
                 query result. Our key performance focus is the response
                 time for delivering the first results of a query. The
                 most effective of the described decompositions have
                 been implemented in XCacheDB, an XML DBMS built on top
                 of a commercial RDBMS, which serves as our experimental
                 basis. We present experiments and analysis that point
                 to a class of decompositions, called inlined
                 decompositions, that improve query performance for full
                 results and first results, without significant increase
                 in the size of the database.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gal:2005:FME,
  author =       "Avigdor Gal and Ateret Anaby-Tavor and Alberto
                 Trombetta and Danilo Montesi",
  title =        "A framework for modeling and evaluating automatic
                 semantic reconciliation",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "50--67",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The introduction of the Semantic Web vision and the
                 shift toward machine understandable Web resources has
                 unearthed the importance of automatic semantic
                 reconciliation. Consequently, new tools for automating
                 the process were proposed. In this work we present a
                 formal model of semantic reconciliation and analyze in
                 a systematic manner the properties of the process
                 outcome, primarily the inherent uncertainty of the
                 matching process and how it reflects on the resulting
                 mappings. An important feature of this research is the
                 identification and analysis of factors that impact the
                 effectiveness of algorithms for automatic semantic
                 reconciliation, leading, it is hoped, to the design of
                 better algorithms by reducing the uncertainty of
                 existing algorithms. Against this background we
                 empirically study the aptitude of two algorithms to
                 correctly match concepts. This research is both timely
                 and practical in light of recent attempts to develop
                 and utilize methods for automatic semantic
                 reconciliation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "mapping; ontology versioning; semantic
                 interoperability",
}

@Article{Halevy:2005:SML,
  author =       "Y. Halevy and G. Ives and Dan Suciu and Igor
                 Tatarinov",
  title =        "Schema mediation for large-scale semantic data
                 sharing",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "68--83",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Intuitively, data management and data integration
                 tools should be well suited for exchanging information
                 in a semantically meaningful way. Unfortunately, they
                 suffer from two significant problems: they typically
                 require a common and comprehensive schema design before
                 they can be used to store or share information, and
                 they are difficult to extend because schema evolution
                 is heavyweight and may break backward compatibility. As
                 a result, many large-scale data sharing tasks are more
                 easily facilitated by non-database-oriented tools that
                 have little support for semantics. The goal of the peer
                 data management system (PDMS) is to address this need:
                 we propose the use of a decentralized, easily
                 extensible data management architecture in which any
                 user can contribute new data, schema information, or
                 even mappings between other peers' schemas.
                 PDMSs represent a natural step beyond data integration
                 systems, replacing their single logical schema with an
                 interlinked collection of semantic mappings between
                 peers' individual schemas. This paper considers
                 the problem of schema mediation in a PDMS. Our first
                 contribution is a flexible language for mediating
                 between peer schemas that extends known data
                 integration formalisms to our more complex
                 architecture. We precisely characterize the complexity
                 of query answering for our language. Next, we describe
                 a reformulation algorithm for our language that
                 generalizes both global-as-view and local-as-view query
                 answering algorithms. Then we describe several methods
                 for optimizing the reformulation algorithm and an
                 initial set of experiments studying its performance.
                 Finally, we define and consider several {\em global\/}
                 problems in managing semantic mappings in a PDMS.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data integration; peer data management; schema
                 mediation; Web and databases",
}

@Article{Benatallah:2005:AWS,
  author =       "Boualem Benatallah and Mohand-Said Hacid and Alain
                 Leger and Christophe Rey and Farouk Toumani",
  title =        "On automating {Web} services discovery",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "84--96",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "One of the challenging problems that Web service
                 technology faces is the ability to effectively discover
                 services based on their capabilities. We present an
                 approach to tackling this problem in the context of
                 description logics (DLs). We formalize service
                 discovery as a new instance of the problem of rewriting
                 concepts using terminologies. We call this new instance
                 the {\em best covering problem}. We provide a
                 formalization of the {\em best covering problem\/} in
                 the framework of DL-based ontologies and propose a
                 hypergraph-based algorithm to effectively compute best
                 covers of a given request. We propose a novel
                 matchmaking algorithm that takes as input a service
                 request (or query) $Q$ and an ontology $\mathcal{T}$ of
                 services and finds a set of services called a ``best
                 cover'' of $Q$ whose descriptions contain as much {\em
                 common information\/} with $Q$ as possible and as
                 little {\em extra information\/} with respect to $Q$ as
                 possible. We have implemented the proposed discovery
                 technique and used the developed prototype in the
                 context of the {\em Multilingual Knowledge Based
                 European Electronic Marketplace\/} (MKBEEM) project.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "description logics; discovery; hypergraphs; semantic
                 matchmaking; Web services",
}

@Article{Sattler:2005:CBQ,
  author =       "Kai-Uwe Sattler and Ingolf Geist and Eike Schallehn",
  title =        "Concept-based querying in mediator systems",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "97--111",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "One approach to overcoming heterogeneity as a part of
                 data integration in mediator systems is the use of
                 metadata in the form of a vocabulary or ontology to
                 represent domain knowledge explicitly. This requires
                 including this meta level during query formulation and
                 processing. In this paper, we address this problem in
                 the context of a mediator that uses a concept-based
                 integration model and an extension of the XQuery
                 language called CQuery. This mediator has been
                 developed as part of a project for integrating data
                 about cultural assets. We describe the language
                 extensions and their semantics as well as the rewriting
                 and evaluation steps. Furthermore, we discuss aspects
                 of caching and keyword-based search in support of an
                 efficient query formulation and processing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data integration; mediator systems; query processing",
}

@Article{Tzitzikas:2005:MTB,
  author =       "Yannis Tzitzikas and Nicolas Spyratos and Panos
                 Constantopoulos",
  title =        "Mediators over taxonomy-based information sources",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "1",
  pages =        "112--136",
  month =        mar,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:14 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose a mediator model for providing integrated
                 and unified access to multiple taxonomy-based sources.
                 Each source comprises a taxonomy and a database that
                 indexes objects under the terms of the taxonomy. A
                 mediator comprises a taxonomy and a set of relations
                 between the mediator's and the sources'
                 terms, called articulations. By combining different
                 modes of query evaluation at the sources and the
                 mediator and different types of query translation, a
                 flexible, efficient scheme of mediator operation is
                 obtained that can accommodate various application needs
                 and levels of answer quality. We adopt a simple
                 conceptual modeling approach (taxonomies and
                 intertaxonomy mappings) and we illustrate its
                 advantages in terms of ease of use, uniformity,
                 scalability, and efficiency. These characteristics make
                 this proposal appropriate for a large-scale network of
                 sources and mediators.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate query translation; information
                 integration; mediators; taxonomies",
}

@Article{Gunopulos:2005:SEM,
  author =       "Dimitrios Gunopulos and George Kollios and J. Tsotras
                 and Carlotta Domeniconi",
  title =        "Selectivity estimators for multidimensional range
                 queries over real attributes",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "137--154",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Estimating the selectivity of multidimensional range
                 queries over real valued attributes has significant
                 applications in data exploration and database query
                 optimization. In this paper, we consider the following
                 problem: given a table of $d$ attributes whose domain
                 is the real numbers and a query that specifies a range
                 in each dimension, find a good approximation of the
                 number of records in the table that satisfy the query.
                 The simplest approach to tackle this problem is to
                 assume that the attributes are independent. More
                 accurate estimators try to capture the joint data
                 distribution of the attributes. In databases, such
                 estimators include the construction of multidimensional
                 histograms, random sampling, or the wavelet transform.
                 In statistics, kernel estimation techniques are being
                 used. Many traditional approaches assume that attribute
                 values come from discrete, finite domains, where
                 different values have high frequencies. However, for
                 many novel applications (as in temporal, spatial, and
                 multimedia databases) attribute values come from the
                 infinite domain of real numbers. Consequently, each
                 value appears very infrequently, a characteristic that
                 affects the behavior and effectiveness of the
                 estimator. Moreover, real-life data exhibit attribute
                 correlations that also affect the estimator. We present
                 a new histogram technique that is designed to
                 approximate the density of multidimensional datasets
                 with real attributes. Our technique defines buckets of
                 variable size and allows the buckets to overlap. The
                 size of the cells is based on the local density of the
                 data. The use of overlapping buckets allows a more
                 compact approximation of the data distribution. We also
                 show how to generalize kernel density estimators and
                 how to apply them to the multidimensional query
                 approximation problem. Finally, we compare the accuracy
                 of the proposed techniques with existing techniques
                 using real and synthetic datasets. The experimental
                 results show that the proposed techniques behave more
                 accurately in high dimensionalities than previous
                 approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Alhajj:2005:VFC,
  author =       "Reda Alhajj and Faruk Polat and Cem Y{\'\i}lmaz",
  title =        "Views as first-class citizens in object-oriented
                 databases",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "155--169",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Extensibility and dynamic schema evolution are among
                 the attractive features that lead to the wide
                 acceptance of the object-oriented paradigm. Not knowing
                 all class hierarchy details should not prevent a user
                 from introducing new classes when necessary. Naive or
                 professional users may define new classes either by
                 using class definition constructs or as views. However,
                 improper placement of such classes leads to a flat
                 hierarchy with many things duplicated. To overcome this
                 problem, we automated the process in order to help the
                 user find the most appropriate position with respect to
                 her class in the hierarchy regardless of her knowledge
                 of the hierarchy. The system must be responsible for
                 the proper placement of new classes because only the
                 system has complete knowledge of the details of the
                 class hierarchy, especially in a dynamic environment
                 where changes are very frequent. In other published
                 work, we proved that to define a view it is enough to
                 have the set of objects that qualify to be in a view in
                 addition to having message expressions (possible paths)
                 that lead to desired values within those objects. Here,
                 we go further to map a view that is intended to be
                 persistent into a class. Then we investigate the proper
                 position of that class in the hierarchy. To achieve
                 this, we consider current characteristics of a new
                 class in order to derive its relationship with other
                 existing classes in the hierarchy. Another advantage of
                 the presented model is that views that generate new
                 objects are still updatable simply because we based the
                 creation of new objects on existing identities. In
                 other words, an object participates inside view objects
                 by its identity regardless of which particular values
                 from that object are of interest to the view. Values
                 are reachable via message expressions, not violating
                 encapsulation. This way, actual values are present in
                 only one place and can be updated.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "class hierarchy; object-oriented databases;
                 reusability; schema evolution; views",
}

@Article{Zhang:2005:OSM,
  author =       "Donghui Zhang and J. Tsotras",
  title =        "Optimizing spatial {Min\slash Max} aggregations",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "170--181",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Aggregate computation over a collection of spatial
                 objects appears in many real-life applications.
                 Aggregates are computed on values (weights) associated
                 with spatial objects, for example, the temperature or
                 rainfall over the area covered by the object. In this
                 paper we concentrate on MIN/MAX aggregations: ``given a
                 query rectangle, find the minimum/maximum weight among
                 all objects intersecting the query rectangle.''
                 Traditionally such queries have been performed as range
                 searches. Assuming that objects are indexed by a
                 spatial access method (SAM), the MIN/MAX is computed
                 while retrieving those objects intersecting the query
                 interval. This requires effort proportional to the
                 number of objects satisfying the query, which may be
                 large. A better approach is to maintain aggregate
                 information among the index nodes of the spatial access
                 method; then various index paths can be eliminated
                 during the range search. Yet another approach is to
                 build a specialized index that maintains the aggregate
                 incrementally. We propose four novel optimizations for
                 improving the performance of MIN/MAX queries when an
                 index structure (traditional or specialized) is
                 present. Moreover, we introduce the MR-tree, an
                 R-tree-like dynamic specialized index that incorporates
                 all four optimizations. Our experiments show that the
                 MR-tree offers drastic performance improvement over
                 previous solutions. As a byproduct of this work we
                 present an optimized version of the MSB-tree, an index
                 that has been proposed for the MIN/MAX computation over
                 1D interval objects.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "indexing; MIN/MAX; spatial aggregates",
}

@Article{Perich:2005:CJP,
  author =       "Filip Perich and Anupam Joshi and Yelena Yesha and Tim
                 Finin",
  title =        "Collaborative joins in a pervasive computing
                 environment",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "182--196",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present a collaborative query processing protocol
                 based on the principles of Contract Nets. The protocol
                 is designed for pervasive computing environments where,
                 in addition to operating on limited computing and
                 battery resources, mobile devices cannot always rely on
                 being able to access the wired infrastructure. Devices,
                 therefore, need to collaborate with each other in order
                 to obtain data otherwise inaccessible due to the nature
                 of the environment. Furthermore, by intelligently using
                 answers cached by peers, devices can reduce their
                 computation cost. We show the effectiveness of our
                 approach by evaluating performance of devices querying
                 for data while moving in a citylike environment.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "distributed join processing; mobile ad hoc networks;
                 peer-to-peer computing; pervasive computing
                 environments; query processing",
}

@Article{Josifovski:2005:QXS,
  author =       "Vanja Josifovski and Marcus Fontoura and Attila
                 Barta",
  title =        "Querying {XML} streams",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "197--210",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Efficient querying of XML streams will be one of the
                 fundamental features of next-generation information
                 systems. In this paper we propose the TurboXPath path
                 processor, which accepts a language equivalent to a
                 subset of the for-let-where constructs of XQuery over a
                 single document. TurboXPath can be extended to provide
                 full XQuery support or used to augment federated
                 database engines for efficient handling of queries over
                 XML data streams produced by external sources.
                 Internally, TurboXPath uses a tree-shaped path
                 expression with multiple outputs to drive the
                 execution. The result of a query execution is a
                 sequence of tuples of XML fragments matching the output
                 nodes. Based on a streamed execution model, TurboXPath
                 scales up to large documents and has limited memory
                 consumption for increased concurrency. Experimental
                 evaluation of a prototype demonstrates performance
                 gains compared to other state-of-the-art path
                 processors.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aggarwal:2005:EEA,
  author =       "C. Aggarwal and S. Yu",
  title =        "An effective and efficient algorithm for
                 high-dimensional outlier detection",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "211--221",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The outlier detection problem has important
                 applications in the field of fraud detection, network
                 robustness analysis, and intrusion detection. Most such
                 applications are most important for high-dimensional
                 domains in which the data can contain hundreds of
                 dimensions. Many recent algorithms have been proposed
                 for outlier detection that use several concepts of
                 proximity in order to find the outliers based on their
                 relationship to the other points in the data. However,
                 in high-dimensional space, the data are sparse and
                 concepts using the notion of proximity fail to retain
                 their effectiveness. In fact, the sparsity of
                 high-dimensional data can be understood in a different
                 way so as to imply that every point is an equally good
                 outlier from the perspective of distance-based
                 definitions. Consequently, for high-dimensional data,
                 the notion of finding meaningful outliers becomes
                 substantially more complex and nonobvious. In this
                 paper, we discuss new techniques for outlier detection
                 that find the outliers by studying the behavior of
                 projections from the data set.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data mining; high-dimensional spaces; outlier
                 detection",
}

@Article{Yao:2005:HBL,
  author =       "D. Yao and Cyrus Shahabi and Per-{\AA}ke Larson",
  title =        "Hash-based labeling techniques for storage scaling",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "222--237",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Scalable storage architectures allow for the addition
                 or removal of storage devices to increase storage
                 capacity and bandwidth or retire older devices.
                 Assuming random placement of data objects across
                 multiple storage devices of a storage pool, our
                 optimization objective is to redistribute a minimum
                 number of objects after scaling the pool. In addition,
                 a uniform distribution, and hence a balanced load,
                 should be ensured after redistribution. Moreover, the
                 redistributed objects should be retrieved efficiently
                 during the normal mode of operation: in one I/O access
                 and with low complexity computation. To achieve this,
                 we propose an algorithm called random disk labeling
                 (RDL), based on double hashing, where storage can be
                 added or removed without any increase in complexity. We
                 compare RDL with other proposed techniques and
                 demonstrate its effectiveness through
                 experimentation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "load balancing; random data placement; scalable
                 storage systems",
}

@Article{Kollios:2005:IMO,
  author =       "George Kollios and Dimitris Papadopoulos and Dimitrios
                 Gunopulos and J. Tsotras",
  title =        "Indexing mobile objects using dual transformations",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "238--256",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the recent advances in wireless networks,
                 embedded systems, and GPS technology, databases that
                 manage the location of moving objects have received
                 increased interest. In this paper, we present indexing
                 techniques for moving object databases. In particular,
                 we propose methods to index moving objects in order to
                 efficiently answer range queries about their current
                 and future positions. This problem appears in real-life
                 applications such as predicting future congestion areas
                 in a highway system or allocating more bandwidth for
                 areas where a high concentration of mobile phones is
                 imminent. We address the problem in external memory and
                 present dynamic solutions, both for the one-dimensional
                 and the two-dimensional cases. Our approach transforms
                 the problem into a dual space that is easier to index.
                 Important in this dynamic environment is not only query
                 performance but also the update processing, given the
                 large number of moving objects that issue updates. We
                 compare the dual-transformation approach with the
                 TPR-tree, an efficient method for indexing moving
                 objects that is based on time-parameterized index
                 nodes. An experimental evaluation shows that the
                 dual-transformation approach provides comparable query
                 performance but has much faster update processing.
                 Moreover, the dual method does not require establishing
                 a predefined query horizon.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access methods; mobile objects; spatiotemporal
                 databases",
}

@Article{Jaluta:2005:CCR,
  author =       "Ibrahim Jaluta and Seppo Sippu and Eljas
                 Soisalon-Soininen",
  title =        "Concurrency control and recovery for balanced {B}-link
                 trees",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "2",
  pages =        "257--277",
  month =        apr,
  year =         "2005",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:15 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper we present new concurrent and
                 recoverable B-link-tree algorithms. Unlike previous
                 algorithms, ours maintain the balance of the B-link
                 tree at all times, so that a logarithmic time bound for
                 a search or an update operation is guaranteed under
                 arbitrary sequences of record insertions and deletions.
                 A database transaction can contain any number of
                 operations of the form ``fetch the first (or next)
                 matching record'', ``insert a record'', or ``delete a
                 record'', where database records are identified by
                 their primary keys. Repeatable-read-level isolation for
                 transactions is guaranteed by key-range locking. The
                 algorithms apply the write-ahead logging (WAL) protocol
                 and the steal and no-force buffering policies for index
                 and data pages. Record inserts and deletes on leaf
                 pages of a B-link tree are logged using physiological
                 redo-undo log records. Each structure modification such
                 as a page split or merge is made an atomic action by
                 keeping the pages involved in the modification latched
                 for the (short) duration of the modification and the
                 logging of that modification; at most two B-link-tree
                 pages are kept $X$-latched at a time. Each structure
                 modification brings the B-link tree into a structurally
                 consistent and balanced state whenever the tree was
                 structurally consistent and balanced initially. Each
                 structure modification is logged using a single
                 physiological redo-only log record. Thus, a structure
                 modification will never be undone even if the
                 transaction that gave rise to it eventually aborts. In
                 restart recovery, the redo pass of our ARIES-based
                 recovery protocol will always produce a structurally
                 consistent and balanced B-link tree, on which the
                 database updates by backward-rolling transactions can
                 always be undone logically, when a physical
                 (page-oriented) undo is no longer possible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "concurrency control; recovery; transaction;
                 tree-structure modifications",
}

@Article{Gaasterland:2005:SID,
  author =       "Terry Gaasterland and H. V. Jagadish and Louiqa
                 Raschid",
  title =        "Special issue on data management, analysis, and mining
                 for the life sciences",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "3",
  pages =        "279--280",
  month =        sep,
  year =         "2005",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-005-0165-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:16 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=279",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

%%% TO DO: [23-Aug-2006] ACM Portal lacks data for v14n4, v15n1, and v15n2
@Article{Tian:2005:PMC,
  author =       "Yuanyuan Tian and Sandeep Tata and Richard A. Hankins
                 and Jignesh M. Patel",
  title =        "Practical methods for constructing suffix trees",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "3",
  pages =        "281--299",
  month =        sep,
  year =         "2005",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-005-0154-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:16 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=281",
  abstract =     "Sequence datasets are ubiquitous in modern
                 life-science applications, and querying sequences is a
                 common and critical operation in many of these
                 applications. The suffix tree is a versatile data
                 structure that can be used to evaluate a wide variety
                 of queries on sequence datasets, including evaluating
                 exact and approximate string matches, and finding
                 repeat patterns. However, methods for constructing
                 suffix trees are often very time-consuming, especially
                 for suffix trees that are large and do not fit in the
                 available main memory. Even when the suffix tree fits
                 in memory, it turns out that the processor cache
                 behavior of theoretically optimal suffix tree
                 construction methods is poor, resulting in poor
                 performance. Currently, there are a large number of
                 algorithms for constructing suffix trees, but the
                 practical tradeoffs in using these algorithms for
                 different scenarios are not well characterized. In this
                 paper, we explore suffix tree construction algorithms
                 over a wide spectrum of data sources and sizes. First,
                 we show that on modern processors, a cache-efficient
                 algorithm with $O(n^2)$ worst-case complexity
                 outperforms popular linear time algorithms like Ukkonen
                 and McCreight, even for in-memory construction. For
                 larger datasets, the disk I/O requirement quickly
                 becomes the bottleneck in each algorithm's performance.
                 To address this problem, we describe two approaches.
                 First, we present a buffer management strategy for the
                 $O(n^2)$ algorithm. The resulting new algorithm, which
                 we call ``Top Down Disk-based'' (TDD), scales to sizes
                 much larger than have been previously described in
                 literature. This approach far outperforms the best
                 known disk-based construction methods. Second, we
                 present a new disk-based suffix tree construction
                 algorithm that is based on a sort-merge paradigm, and
                 show that for constructing very large suffix trees with
                 very little resources, this algorithm is more efficient
                 than TDD.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "sequence matching; suffix tree construction",
}

@Article{Claypool:2005:SYD,
  author =       "Kajal T. Claypool and Elke A. Rundensteiner",
  title =        "Sync your data: update propagation for heterogeneous
                 protein databases",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "3",
  pages =        "300--317",
  month =        sep,
  year =         "2005",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-005-0155-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:16 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=300",
  abstract =     "The traditional model of bench (wet) chemistry in many
                 life sciences domain is today actively complemented by
                 computer-based discoveries utilizing the growing number
                 of online data sources. A typical {\em computer-based
                 discovery\/} scenario for many life scientists includes
                 the creation of local caches of pertinent information
                 from multiple online resources such as Swissprot
                 [Nucleic Acid Res. 1 (28), 45--48 (2000)], PIR [Nucleic
                 Acids Res. 28 (1), 41--44 (2000)], PDB [The Protein
                 DataBank. Wiley, New York (2003)], to enable efficient
                 data analysis. This local caching of data, however,
                 exposes their research and eventual results to the
                 problems of data staleness, that is, cached data may
                 quickly be obsolete or incorrect, dependent on the
                 updates that are made to the source data. This
                 represents a significant challenge to the scientific
                 community, forcing scientists to be continuously aware
                 of the frequent changes made to public data sources,
                 and more importantly aware of the potential effects on
                 their own derived data sets during the course of their
                 research. To address this significant challenge, in
                 this paper we present an approach for handling update
                 propagation between heterogeneous databases,
                 guaranteeing data freshness for scientists irrespective
                 of their choice of data source and its underlying data
                 model or interface. We propose a {\em middle-layer\/}
                 --based solution wherein first the change in the online
                 data source is translated to a sequence of changes in
                 the middle-layer; next each change in the middle-layer
                 is propagated through an algebraic representation of
                 the translation between the source and the target; and
                 finally the net-change is translated to a set of
                 changes that are then applied to the local cache. In
                 this paper, we present our algebraic model that
                 represents the mapping of the online resource to the
                 local cache, as well as our adaptive propagation
                 algorithm that can incrementally propagate both schema
                 and data changes from the source to the cache in a data
                 model independent manner. We present a case study based
                 on a joint ongoing project with our collaborators in
                 the Chemistry Department at UMass-Lowell to explicate
                 our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data transformation; data translation; schema
                 evolution; update propagation; view maintenance",
}

@Article{Conery:2005:RBW,
  author =       "John S. Conery and Julian M. Catchen and Michael
                 Lynch",
  title =        "Rule-based workflow management for bioinformatics",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "3",
  pages =        "318--329",
  month =        sep,
  year =         "2005",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-005-0153-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:16 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=318",
  abstract =     "We describe a data-centric software architecture for
                 bioinformatics workflows and a rule-based workflow
                 enactment system that uses declarative specifications
                 of data dependences between steps to automatically
                 order the execution of those steps. A data-centric view
                 allows researchers to develop abstract descriptions of
                 workflow products and provides mechanisms for
                 describing workflow steps as objects. The rule-based
                 approach supports an iterative design methodology for
                 creating new workflows, where steps can be developed in
                 small, incremental updates, and the object orientation
                 allows workflow steps developed for one project to be
                 reused in other projects.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "bioinformatics; rule-based system; workflow",
}

@Article{Thakkar:2005:COE,
  author =       "Snehal Thakkar and Jos{\'e} Luis Ambite and Craig A.
                 Knoblock",
  title =        "Composing, optimizing, and executing plans for
                 bioinformatics web services",
  journal =      j-VLDB-J,
  volume =       "14",
  number =       "3",
  pages =        "330--353",
  month =        sep,
  year =         "2005",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-005-0158-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:16 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0938-1287&volume=14&issue=3;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://www.springerlink.com/openurl.asp?genre=article&issn=0938-1287&volume=14&issue=3&spage=330",
  abstract =     "The emergence of a large number of bioinformatics
                 datasets on the Internet has resulted in the need for
                 flexible and efficient approaches to integrate
                 information from multiple bioinformatics data sources
                 and services. In this paper, we present our approach to
                 automatically generate composition plans for web
                 services, optimize the composition plans, and execute
                 these plans efficiently. While data integration
                 techniques have been applied to the bioinformatics
                 domain, the focus has been on answering specific user
                 queries. In contrast, we focus on automatically
                 generating {\em parameterized\/} integration plans that
                 can be hosted as web services that respond to a range
                 of inputs. In addition, we present two novel techniques
                 that improve the execution time of the generated plans
                 by reducing the number of requests to the existing data
                 sources and by executing the generated plan more
                 efficiently. The first optimization technique, called
                 tuple-level filtering, analyzes the source/service
                 descriptions in order to automatically insert filtering
                 conditions in the composition plans that result in
                 fewer requests to the component web services. To ensure
                 that the filtering conditions can be evaluated, this
                 technique may include sensing operations in the
                 integration plan. The savings due to filtering
                 significantly exceed the cost of the sensing
                 operations. The second optimization technique consists
                 in mapping the integration plans into programs that can
                 be executed by a dataflow-style, streaming execution
                 engine. We use real-world bioinformatics web services
                 to show experimentally that (1) our automatic
                 composition techniques can efficiently generate
                 parameterized plans that integrate data from large
                 numbers of existing services and (2) our optimization
                 techniques can significantly reduce the response time
                 of the generated integration plans.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "bioinformatics; data integration; dataflow-style
                 streaming execution; query optimization; Web service
                 composition",
}

@Article{Vlachos:2006:IMT,
  author =       "Michail Vlachos and Marios Hadjieleftheriou and
                 Dimitrios Gunopulos and Eamonn Keogh",
  title =        "Indexing {Multidimensional Time-Series}",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "1",
  pages =        "1--20",
  month =        jan,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "While most time series data mining research has
                 concentrated on providing solutions for a single
                 distance function, in this work we motivate the need
                 for an index structure that can support multiple
                 distance measures. Our specific area of interest is the
                 efficient retrieval and analysis of similar
                 trajectories. Trajectory datasets are very common in
                 environmental applications, mobility experiments, and
                 video surveillance and are especially important for the
                 discovery of certain biological patterns. Our primary
                 similarity measure is based on the longest common
                 subsequence (LCSS) model that offers enhanced
                 robustness, particularly for noisy data, which are
                 encountered very often in real-world applications.
                 However, our index is able to accommodate other
                 distance measures as well, including the ubiquitous
                 Euclidean distance and the increasingly popular dynamic
                 time warping (DTW). While other researchers have
                 advocated one or other of these similarity measures, a
                 major contribution of our work is the ability to
                 support all these measures without the need to
                 restructure the index. Our framework guarantees no
                 false dismissals and can also be tailored to provide
                 much faster response time at the expense of slightly
                 reduced precision/recall. The experimental results
                 demonstrate that our index can help speed up the
                 computation of expensive similarity measures such as
                 the LCSS and the DTW.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic time warping; ensemble index; longest common
                 subsequence; motion capture; trajectories",
}

@Article{Zheng:2006:GPI,
  author =       "Baihua Zheng and Jianliang Xu and Wang-Chien Lee and
                 Lun Lee",
  title =        "Grid-partition index: a hybrid method for
                 nearest-neighbor queries in wireless location-based
                 services",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "1",
  pages =        "21--39",
  month =        jan,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional nearest-neighbor (NN) search is based on
                 two basic indexing approaches: object-based indexing
                 and solution-based indexing. The former is constructed
                 based on the locations of data objects: using some
                 distance heuristics on object locations. The latter is
                 built on a precomputed solution space. Thus, NN queries
                 can be reduced to and processed as simple point queries
                 in this solution space. Both approaches exhibit some
                 disadvantages, especially when employed for wireless
                 data broadcast in mobile computing environments. In
                 this paper, we introduce a new index method, called the
                 {\em grid-partition index}, to support NN search in
                 both on-demand access and periodic broadcast modes of
                 mobile computing. The grid-partition index is
                 constructed based on the Voronoi diagram, i.e., the
                 solution space of NN queries. However, it has two
                 distinctive characteristics. First, it divides the
                 solution space into grid cells such that a query point
                 can be efficiently mapped into a grid cell around which
                 the nearest object is located. This significantly
                 reduces the search space. Second, the grid-partition
                 index stores the {\em objects\/} that are potential NNs
                 of any query falling within the cell. The storage of
                 objects, instead of the Voronoi cells, makes the
                 grid-partition index a hybrid of the solution-based and
                 object-based approaches. As a result, it achieves a
                 much more compact representation than the pure
                 solution-based approach and avoids backtracked
                 traversals required in the typical object-based
                 approach, thus realizing the advantages of both
                 approaches. We develop an incremental construction
                 algorithm to address the issue of object update. In
                 addition, we present a cost model to approximate the
                 search cost of different grid partitioning schemes. The
                 performances of the grid-partition index and existing
                 indexes are evaluated using both synthetic and real
                 data. The results show that, overall, the
                 grid-partition index significantly outperforms
                 object-based indexes and solution-based indexes.
                 Furthermore, we extend the grid-partition index to
                 support continuous-nearest-neighbor search. Both
                 algorithms and experimental results are presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous-nearest-neighbor search; index structure;
                 location-dependent data; nearest-neighbor search;
                 wireless broadcast",
}

@Article{Tamir:2006:CGM,
  author =       "Raz Tamir and Yehuda Singer",
  title =        "On a confidence gain measure for association rule
                 discovery and scoring",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "1",
  pages =        "40--52",
  month =        jan,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This article presents a new interestingness measure
                 for association rules called confidence gain (CG).
                 Focus is given to extraction of human associations
                 rather than associations between market products. There
                 are two main differences between the two (human and
                 market associations). The first difference is the
                 strong asymmetry of human associations (e.g., the
                 association ``shampoo''--``hair'' is much stronger than
                 ``hair''--``shampoo''), where in market products
                 asymmetry is less intuitive and less evident. The
                 second is the background knowledge humans employ when
                 presented with a stimulus (input phrase).CG calculates
                 the local confidence of a given term compared to its
                 average confidence throughout a given database. CG is
                 found to outperform several association measures since
                 it captures both the asymmetric notion of an
                 association (as in the confidence measure) while adding
                 the comparison to an expected confidence (as in the
                 lift measure). The use of average confidence introduces
                 the ``background knowledge'' notion into the CG
                 measure. Various experiments have shown that CG and
                 local confidence gain (a low-complexity version of CG)
                 successfully generate association rules when compared
                 to human free associations. The experiments include a
                 large-scale ``free sssociation Turing test'' where
                 human free associations were compared to associations
                 generated by the CG and other association measures.
                 Rules discovered by CG were found to be significantly
                 better than those discovered by other measures. CG can
                 be used for many purposes, such as personalization,
                 sense disambiguation, query expansion, and improving
                 classification performance of small item sets within
                 large databases. Although CG was found to be useful for
                 Internet data retrieval, results can be easily used
                 over any type of database.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "association generation; association rule validation
                 methods; confidence gain; Web data management; Web
                 mining",
}

@Article{Bremer:2006:IDD,
  author =       "Jan-Marco Bremer and Michael Gertz",
  title =        "Integrating document and data retrieval based on
                 {XML}",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "1",
  pages =        "53--83",
  month =        jan,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "For querying structured and semistructured data, data
                 retrieval and document retrieval are two valuable and
                 complementary techniques that have not yet been fully
                 integrated. In this paper, we introduce integrated
                 information retrieval (IIR), an XML-based retrieval
                 approach that closes this gap. We introduce the syntax
                 and semantics of an extension of the XQuery language
                 called XQuery/IR. The extended language realizes IIR
                 and thereby allows users to formulate new kinds of
                 queries by nesting ranked document retrieval and
                 precise data retrieval queries. Furthermore, we detail
                 index structures and efficient query processing
                 approaches for implementing XQuery/IR. Based on a new
                 identification scheme for nodes in node-labeled tree
                 structures, the extended index structures require only
                 a fraction of the space of comparable index structures
                 that only support data retrieval.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data retrieval; document retrieval; index structures;
                 integrated information retrievals; structural join;
                 XML",
}

@Article{Ogras:2006:OSD,
  author =       "Y. Ogras and Hakan Ferhatosmanoglu",
  title =        "Online summarization of dynamic time series data",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "1",
  pages =        "84--98",
  month =        jan,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:17 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Managing large-scale time series databases has
                 attracted significant attention in the database
                 community recently. Related fundamental problems such
                 as dimensionality reduction, transformation, pattern
                 mining, and similarity search have been studied
                 extensively. Although the time series data are dynamic
                 by nature, as in data streams, current solutions to
                 these fundamental problems have been mostly for the
                 static time series databases. In this paper, we first
                 propose a framework to online summary generation for
                 large-scale and dynamic time series data, such as data
                 streams. Then, we propose online transform-based
                 summarization techniques over data streams that can be
                 updated in constant time and space. We present both the
                 exact and approximate versions of the proposed
                 techniques and provide error bounds for the approximate
                 case. One of our main contributions in this paper is
                 the extensive performance analysis. Our experiments
                 carefully evaluate the quality of the online summaries
                 for point, range, and $k ???? nn$ queries using
                 real-life dynamic data sets of substantial size.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data streams; dimensionality reduction; time-series
                 data; transformation-based summarization",
}

@Article{Goh:2006:DBM,
  author =       "Leng Goh and Yanfeng Shu and Zhiyong Huang and Chin
                 Ooi",
  title =        "Dynamic buffer management with extensible replacement
                 policies",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "2",
  pages =        "99--120",
  month =        jun,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:18 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The objective of extensible DBMSs is to ease the
                 construction of specialized DBMSs for nontraditional
                 applications. Although much work has been done in
                 providing various levels of extensibility (e.g.,
                 extensibility of data types and operators, query
                 language extensibility, and query optimizer
                 extensibility), there has been very limited research in
                 providing extensibility at the buffer management level.
                 Supporting extensibility at the buffer management level
                 is important as it can contribute significantly to
                 overall system performance. This paper addresses the
                 problem of supporting extensibility of buffer
                 replacement policies. The main contribution is the
                 proposal of a framework for modeling buffer replacement
                 policies. This work is novel in two aspects. First, by
                 providing a uniform and generic specification of buffer
                 replacement policies, the proposed framework unifies
                 existing work in this area. Second, our work introduces
                 a new level of extensibility. None of the existing
                 extensible DBMSs, to our knowledge, provides
                 extensibility at the buffer management level. The
                 proposed framework provides a basis for the
                 construction of an extensible buffer manager as part of
                 a 100\% Java-based storage manager. We conducted an
                 extensive performance study to investigate the
                 performance of the proposed framework. The experimental
                 results demonstrate that the proposed framework is
                 indeed feasible for existing DBMSs and improves system
                 performance significantly without costing significant
                 overhead.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "buffer management; extensible DBMS; replacement
                 strategies",
}

@Article{Arasu:2006:CCQ,
  author =       "Arvind Arasu and Shivnath Babu and Jennifer Widom",
  title =        "The {CQL} continuous query language: semantic
                 foundations and query execution",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "2",
  pages =        "121--142",
  month =        jun,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:18 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "{\em CQL}, a {\em continuous query language}, is
                 supported by the STREAM prototype data stream
                 management system (DSMS) at Stanford. CQL is an
                 expressive SQL-based declarative language for
                 registering continuous queries against streams and
                 stored relations. We begin by presenting an abstract
                 semantics that relies only on ``black-box'' mappings
                 among streams and relations. From these mappings we
                 define a precise and general interpretation for
                 continuous queries. CQL is an instantiation of our
                 abstract semantics using SQL to map from relations to
                 relations, window specifications derived from SQL-99 to
                 map from streams to relations, and three new operators
                 to map from relations to streams. Most of the CQL
                 language is operational in the STREAM system. We
                 present the structure of CQL's query execution plans as
                 well as details of the most important components:
                 operators, interoperator queues, synopses, and sharing
                 of components among multiple operators and queries.
                 Examples throughout the paper are drawn from the {\em
                 Linear Road\/} benchmark recently proposed for DSMSs.
                 We also curate a public repository of data stream
                 applications that includes a wide variety of queries
                 expressed in CQL. The relative ease of capturing these
                 applications in CQL is one indicator that the language
                 contains an appropriate set of constructs for data
                 stream processing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous queries; data streams; query language;
                 query processing",
}

@Article{Hadjieleftheriou:2006:ISA,
  author =       "Marios Hadjieleftheriou and George Kollios and J.
                 Tsotras and Dimitrios Gunopulos",
  title =        "Indexing spatiotemporal archives",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "2",
  pages =        "143--164",
  month =        jun,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:18 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Spatiotemporal objects --- that is, objects that
                 evolve over time --- appear in many applications. Due
                 to the nature of such applications, storing the
                 evolution of objects through time in order to answer
                 historical queries (queries that refer to past states
                 of the evolution) requires a very large specialized
                 database, what is termed in this article a {\em
                 spatiotemporal archive}. Efficient processing of
                 historical queries on spatiotemporal archives requires
                 equally sophisticated indexing schemes. Typical
                 spatiotemporal indexing techniques represent the
                 objects using minimum bounding regions (MBR) extended
                 with a temporal dimension, which are then indexed using
                 traditional multidimensional index structures. However,
                 rough MBR approximations introduce excessive overlap
                 between index nodes, which deteriorates query
                 performance. This article introduces a robust indexing
                 scheme for answering spatiotemporal queries more
                 efficiently. A number of algorithms and heuristics are
                 elaborated that can be used to preprocess a
                 spatiotemporal archive in order to produce {\em finer
                 object approximations}, which, in combination with {\em
                 a multiversion index structure}, will greatly improve
                 query performance in comparison to the straightforward
                 approaches. The proposed techniques introduce a query
                 efficiency vs. space tradeoff that can help tune a
                 structure according to available resources. Empirical
                 observations for estimating the necessary amount of
                 additional storage space required for improving query
                 performance by a given factor are also provided.
                 Moreover, heuristics for applying the proposed ideas in
                 an online setting are discussed. Finally, a thorough
                 experimental evaluation is conducted to show the merits
                 of the proposed techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "indexing; moving objects; spatiotemporal databases;
                 trajectories",
}

@Article{Guting:2006:MQM,
  author =       "Hartmut G{\"u}ting and Teixeira de Almeida and Zhiming
                 Ding",
  title =        "Modeling and querying moving objects in networks",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "2",
  pages =        "165--190",
  month =        jun,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:18 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Moving objects databases have become an important
                 research issue in recent years. For modeling and
                 querying moving objects, there exists a comprehensive
                 framework of abstract data types to describe objects
                 moving freely in the 2D plane, providing data types
                 such as {\em moving point\/} or {\em moving region}.
                 However, in many applications people or vehicles move
                 along transportation networks. It makes a lot of sense
                 to model the network explicitly and to describe
                 movements relative to the network rather than
                 unconstrained space, because then it is much easier to
                 formulate in queries relationships between moving
                 objects and the network. Moreover, such models can be
                 better supported in indexing and query processing. In
                 this paper, we extend the ADT approach by modeling
                 networks explicitly and providing data types for static
                 and moving network positions and regions. In a highway
                 network, example entities corresponding to these data
                 types are motels, construction areas, cars, and traffic
                 jams. The network model is not too simplistic; it
                 allows one to distinguish simple roads and divided
                 highways and to describe the possible traversals of
                 junctions precisely. The new types and operations are
                 integrated seamlessly into the ADT framework to achieve
                 a relatively simple, consistent and powerful overall
                 model and query language for constrained and
                 unconstrained movement.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "ADT; data type; moving object; network;
                 spatio-temporal",
}

@Article{Chirkova:1999:AQU,
  author =       "Rada Chirkova and Chen Li and Jia Li",
  title =        "Answering queries using materialized views with
                 minimum size",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "3",
  pages =        "191--210",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:19 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study the following problem. Given a
                 database and a set of queries, we want to find a set of
                 views that can compute the answers to the queries, such
                 that the amount of space, in bytes, required to store
                 the viewset is minimum on the given database. (We also
                 handle problem instances where the input has a {\em
                 set\/} of database instances, as described by an oracle
                 that returns the sizes of view relations for given view
                 definitions.) This problem is important for
                 applications such as distributed databases, data
                 warehousing, and data integration. We explore the
                 decidability and complexity of the problem for
                 workloads of conjunctive queries. We show that results
                 differ significantly depending on whether the workload
                 queries have self-joins. Further, for queries without
                 self-joins we describe a very compact search space of
                 views, which contains all views in at least one optimal
                 viewset. We present techniques for finding a
                 minimum-size viewset for a single query without
                 self-joins by using the shape of the query and its
                 constraints, and validate the approach by extensive
                 experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehouses; distributed systems; minimum-size
                 viewsets; views",
  remark =       "Check month: April or May??",
}

@Article{Cao:1999:STD,
  author =       "Hu Cao and Ouri Wolfson and Goce Trajcevski",
  title =        "Spatio-temporal data reduction with deterministic
                 error bounds",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "3",
  pages =        "211--228",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:19 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A common way of storing spatio-temporal information
                 about mobile devices is in the form of a 3D (2D
                 geography + time) trajectory. We argue that when
                 cellular phones and Personal Digital Assistants become
                 location-aware, the size of the spatio-temporal
                 information generated may prohibit efficient
                 processing. We propose to adopt a technique studied in
                 computer graphics, namely line-simplification, as an
                 approximation technique to solve this problem. Line
                 simplification will reduce the size of the
                 trajectories. Line simplification uses a distance
                 function in producing the trajectory approximation. We
                 postulate the desiderata for such a distance-function:
                 it should be sound, namely the error of the answers to
                 spatio-temporal queries must be bounded. We analyze
                 several distance functions, and prove that some are
                 sound in this sense for some types of queries, while
                 others are not. A distance function that is sound for
                 all common spatio-temporal query types is introduced
                 and analyzed. Then we propose an aging mechanism which
                 gradually shrinks the size of the trajectories as time
                 progresses. We also propose to adopt existing
                 linguistic constructs to manage the uncertainty
                 introduced by the trajectory approximation. Finally, we
                 analyze experimentally the effectiveness of
                 line-simplification in reducing the size of a
                 trajectories database.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data reduction; line simplification; moving objects
                 database; uncertainty",
  remark =       "Check month: April or May??",
}

@Article{Benetis:1999:NRN,
  author =       "Rimantas Benetis and S. Jensen and Gytis
                 Kar{\c{c}}iauskas and Simonas {\ocirc{S}}altenis",
  title =        "Nearest and reverse nearest neighbor queries for
                 moving objects",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "3",
  pages =        "229--249",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:19 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the continued proliferation of wireless
                 communications and advances in positioning
                 technologies, algorithms for efficiently answering
                 queries about large populations of moving objects are
                 gaining interest. This paper proposes algorithms for
                 $k$ nearest and reverse $k$ nearest neighbor queries on
                 the current and anticipated future positions of points
                 moving continuously in the plane. The former type of
                 query returns $k$ objects nearest to a query object for
                 each time point during a time interval, while the
                 latter returns the objects that have a specified query
                 object as one of their $k$ closest neighbors, again for
                 each time point during a time interval. In addition,
                 algorithms for so-called persistent and continuous
                 variants of these queries are provided. The algorithms
                 are based on the indexing of object positions
                 represented as linear functions of time. The results of
                 empirical performance experiments are reported.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous queries; incremental update; location-based
                 services; mobile objects; neighbor queries; persistent
                 queries",
  remark =       "Check month: April or May??",
}

@Article{Pelleg:1999:DTS,
  author =       "Dan Pelleg and Andrew Moore",
  title =        "Dependency trees in sub-linear time and bounded
                 memory",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "3",
  pages =        "250--262",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:19 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We focus on the problem of efficient learning of
                 dependency trees. Once grown, they can be used as a
                 special case of a Bayesian network, for PDF
                 approximation, and for many other uses. Given the data,
                 a well-known algorithm can fit an optimal tree in time
                 that is quadratic in the number of attributes and
                 linear in the number of records. We show how to modify
                 it to exploit partial knowledge about edge weights.
                 Experimental results show running time that is
                 near-constant in the number of records, without
                 significant loss in accuracy of the generated trees.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data mining; dependency trees; fast algorithms;
                 probably approximately correct learning",
  remark =       "Check month: April or May??",
}

@Article{Che:1999:QOX,
  author =       "Dunren Che and Karl Aberer and Tamer {\"O}zsu",
  title =        "Query optimization in {XML} structured-document
                 databases",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "3",
  pages =        "263--289",
  month =        apr,
  year =         "1999",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:19 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "While the information published in the form of
                 XML-compliant documents keeps fast mounting up,
                 efficient and effective query processing and
                 optimization for XML have now become more important
                 than ever. This article reports our recent advances in
                 XML structured-document query optimization. In this
                 article, we elaborate on a novel approach and the
                 techniques developed for XML query optimization. Our
                 approach performs heuristic-based algebraic
                 transformations on XPath queries, represented as PAT
                 algebraic expressions, to achieve query optimization.
                 This article first presents a comprehensive set of
                 general equivalences with regard to XML documents and
                 XML queries. Based on these equivalences, we developed
                 a large set of deterministic algebraic transformation
                 rules for XML query optimization. Our approach is
                 unique, in that it performs exclusively deterministic
                 transformations on queries for fast optimization. The
                 deterministic nature of the proposed approach
                 straightforwardly renders high optimization efficiency
                 and simplicity in implementation. Our approach is a
                 logical-level one, which is independent of any
                 particular storage model. Therefore, the optimizers
                 developed based on our approach can be easily adapted
                 to a broad range of XML data/information servers to
                 achieve fast query optimization. Experimental study
                 confirms the validity and effectiveness of the proposed
                 approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "deterministic query optimization; query
                 transformation; XML database; XML query optimization;
                 XML query processing",
  remark =       "Check month: April or May??",
}

@Article{Ferrari:2006:GES,
  author =       "Elena Ferrari and Bhavani Thuraisingham",
  title =        "Guest editorial: special issue on privacy preserving
                 data management",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "291--292",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: April or November??",
}

@Article{Mukherjee:2006:PPT,
  author =       "Shibnath Mukherjee and Zhiyuan Chen and Aryya
                 Gangopadhyay",
  title =        "A privacy-preserving technique for {Euclidean}
                 distance-based mining algorithms using
                 {Fourier}-related transforms",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "293--315",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Privacy preserving data mining has become increasingly
                 popular because it allows sharing of privacy-sensitive
                 data for analysis purposes. However, existing
                 techniques such as random perturbation do not fare well
                 for simple yet widely used and efficient Euclidean
                 distance-based mining algorithms. Although original
                 data distributions can be pretty accurately
                 reconstructed from the perturbed data, distances
                 between individual data points are not preserved,
                 leading to poor accuracy for the distance-based mining
                 methods. Besides, they do not generally focus on data
                 reduction. Other studies on secure multi-party
                 computation often concentrate on techniques useful to
                 very specific mining algorithms and scenarios such that
                 they require modification of the mining algorithms and
                 are often difficult to generalize to other mining
                 algorithms or scenarios. This paper proposes a novel
                 generalized approach using the well-known energy
                 compaction power of Fourier-related transforms to hide
                 sensitive data values and to approximately preserve
                 Euclidean distances in centralized and distributed
                 scenarios to a great degree of accuracy. Three
                 algorithms to select the most important transform
                 coefficients are presented, one for a centralized
                 database case, the second one for a horizontally
                 partitioned, and the third one for a vertically
                 partitioned database case. Experimental results
                 demonstrate the effectiveness of the proposed
                 approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data mining; Fourier transform; privacy",
  remark =       "Check month: September or November??",
}

@Article{Jiang:2006:SDF,
  author =       "Wei Jiang and Chris Clifton",
  title =        "A secure distributed framework for achieving
                 $k$-anonymity",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "316--333",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "$k$-anonymity provides a measure of privacy protection
                 by preventing re-identification of data to fewer than a
                 group of $k$ data items. While algorithms exist for
                 producing $k$-anonymous data, the model has been that
                 of a single source wanting to publish data. Due to
                 privacy issues, it is common that data from different
                 sites cannot be shared directly. Therefore, this paper
                 presents a two-party framework along with an
                 application that generates $k$-anonymous data from two
                 vertically partitioned sources without disclosing data
                 from one site to the other. The framework is privacy
                 preserving in the sense that it satisfies the secure
                 definition commonly defined in the literature of Secure
                 Multiparty Computation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "anonymity; privacy; security",
  remark =       "Check month: April or November??",
}

@Article{Blanton:2006:SRF,
  author =       "Marina Blanton and Mikhail Atallah",
  title =        "Succinct representation of flexible and
                 privacy-preserving access rights",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "334--354",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We explore the problem of portable and flexible
                 privacy preserving access rights that permit access to
                 a large collection of digital goods. {\em
                 Privacy-preserving\/} access control means that the
                 service provider can neither learn what access rights a
                 customer has nor link a request to access an item to a
                 particular customer, thus maintaining privacy of both
                 customer activity and customer access rights. {\em
                 Flexible\/} access rights allow a customer to choose a
                 subset of items or groups of items from the repository,
                 obtain access to and be charged only for the items
                 selected. And {\em portability\/} of access rights
                 means that the rights themselves can be stored on small
                 devices of limited storage space and computational
                 capabilities such as smartcards or sensors, and
                 therefore the rights must be enforced using the limited
                 resources available. In this paper, we present and
                 compare two schemes that address the problem of such
                 access rights. We show that much can be achieved if one
                 allows for even a negligible amount of false positives
                 --- items that were not requested by the customer, but
                 inadvertently were included in the customer access
                 right representation due to constrained space
                 resources. But minimizing false positives is one of
                 many other desiderata that include protection against
                 sharing of false positives information by unscrupulous
                 users, providing the users with transaction
                 untraceability and unlinkability, and forward
                 compatibility of the scheme. Our first scheme does not
                 place any constraints on the amount of space available
                 on the limited-capacity storage device, and searches
                 for the best representation that meets the
                 requirements. The second scheme, on the other hand, has
                 (modest) requirements on the storage space available,
                 but guarantees a low rate of false positives: with
                 $O(mc)$ storage space available on the smartcard (where
                 $m$ is the number of items or groups of items included
                 in the subscription and $c$ is a selectable parameter),
                 it achieves a rate of false positives of $m^{-c}$.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "compact representation; flexible access rights;
                 privacy-preserving access rights",
  remark =       "Check month: April or November??",
}

@Article{Domingo-Ferrer:2006:EMD,
  author =       "Josep Domingo-Ferrer and Antoni
                 Mart{\'\i}nez-Ballest{\'e} and Josep Maria Mateo-Sanz
                 and Francesc Seb{\'e}",
  title =        "Efficient multivariate data-oriented
                 microaggregation",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "355--369",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Microaggregation is a family of methods for
                 statistical disclosure control (SDC) of microdata
                 (records on individuals and/or companies), that is, for
                 masking microdata so that they can be released while
                 preserving the privacy of the underlying individuals.
                 The principle of microaggregation is to aggregate
                 original database records into small groups prior to
                 publication. Each group should contain at least $k$
                 records to prevent disclosure of individual
                 information, where $k$ is a constant value preset by
                 the data protector. Recently, microaggregation has been
                 shown to be useful to achieve $k$-anonymity, in
                 addition to it being a good masking method. Optimal
                 microaggregation (with minimum within-groups
                 variability loss) can be computed in polynomial time
                 for univariate data. Unfortunately, for multivariate
                 data it is an NP-hard problem. Several heuristic
                 approaches to microaggregation have been proposed in
                 the literature. Heuristics yielding groups with fixed
                 size $k$ tends to be more efficient, whereas
                 data-oriented heuristics yielding variable group size
                 tends to result in lower information loss. This paper
                 presents new data-oriented heuristics which improve on
                 the trade-off between computational complexity and
                 information loss and are thus usable for large
                 datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "anonymity; microaggregation; microdata protection;
                 privacy; statistical databases; statistical disclosure
                 control",
  remark =       "Check month: April or November??",
}

@Article{Massacci:2006:HHD,
  author =       "Fabio Massacci and John Mylopoulos and Nicola
                 Zannone",
  title =        "Hierarchical {Hippocratic} databases with minimal
                 disclosure for virtual organizations",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "370--387",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The protection of customer privacy is a fundamental
                 issue in today's corporate marketing
                 strategies. Not surprisingly, many research efforts
                 have proposed new privacy-aware technologies. Among
                 them, Hippocratic databases offer mechanisms for
                 enforcing privacy rules in database systems for
                 inter-organizational business processes (also known as
                 virtual organizations). This paper extends these
                 mechanisms to allow for hierarchical purposes,
                 distributed authorizations and minimal disclosure
                 supporting the business processes of virtual
                 organizations that want to offer their clients a number
                 of ways to fulfill a service. Specifically, we use a
                 goal-oriented approach to analyze privacy policies of
                 the enterprises involved in a business process. On the
                 basis of the purpose hierarchy derived through a goal
                 refinement process, we provide algorithms for
                 determining the minimum set of authorizations needed to
                 achieve a service. This allows us to automatically
                 derive access control policies for an
                 inter-organizational business process from the
                 collection of privacy policies associated with
                 different participating enterprises. By using effective
                 on-line algorithms, the derivation of such minimal
                 information can also be done on-the-fly by the customer
                 wishing to access a service.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access control; delegation; information security;
                 minimal disclosure; privacy protection; private data
                 management; virtual organizations",
  remark =       "Check month: April or November??",
}

@Article{Xiong:2006:PLM,
  author =       "Hui Xiong and Michael Steinbach and Vipin Kumar",
  title =        "Privacy leakage in multi-relational databases: a
                 semi-supervised learning perspective",
  journal =      j-VLDB-J,
  volume =       "15",
  number =       "4",
  pages =        "388--402",
  month =        nov,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:20 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In multi-relational databases, a view, which is a
                 context- and content-dependent subset of one or more
                 tables (or other views), is often used to preserve
                 privacy by hiding sensitive information. However,
                 recent developments in data mining present a new
                 challenge for database security even when traditional
                 database security techniques, such as database access
                 control, are employed. This paper presents a data
                 mining framework using semi-supervised learning that
                 demonstrates the potential for privacy leakage in
                 multi-relational databases. Many different types of
                 semi-supervised learning techniques, such as the
                 K-nearest neighbor (KNN) method, can be used to
                 demonstrate privacy leakage. However, we also introduce
                 a new approach to semi-supervised learning, hyperclique
                 pattern-based semi-supervised learning (HPSL), which
                 differs from traditional semi-supervised learning
                 approaches in that it considers the similarity among
                 groups of objects instead of only pairs of objects. Our
                 experimental results show that both the KNN and HPSL
                 methods have the ability to compromise database
                 security, although the HPSL is better at this privacy
                 violation (has higher prediction accuracy) than the KNN
                 method. Finally, we provide a principle for avoiding
                 privacy leakage in multi-relational databases via
                 semi-supervised learning and illustrate this principle
                 with a simple preventive technique whose effectiveness
                 is demonstrated by experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  remark =       "Check month: April or November??",
}

@Article{Haas:2006:SIB,
  author =       "Laura M. Haas and Christian S. Jensen and Martin L.
                 Kersten",
  title =        "Special issue: best papers of {VLDB 2005}",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "1--3",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Godfrey:2006:AAM,
  author =       "Parke Godfrey and Ryan Shipley and Jarek Gryz",
  title =        "Algorithms and analyses for maximal vector
                 computation",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "5--28",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Larson:2006:VMO,
  author =       "Per-{\AA}ke Larson and Jingren Zhou",
  title =        "View matching for outer-join views",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "29--53",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Markl:2006:CSE,
  author =       "V. Markl and P. J. Haas and M. Kutsch and N. Megiddo
                 and U. Srivastava and T. M. Tran",
  title =        "Consistent selectivity estimation via maximum
                 entropy",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "55--76",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ghoting:2006:CCF,
  author =       "Amol Ghoting and Gregory Buehrer and Srinivasan
                 Parthasarathy and Daehyun Kim and Anthony Nguyen and
                 Yen-Kuang Chen and Pradeep Dubey",
  title =        "Cache-conscious frequent pattern mining on modern and
                 emerging processors",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "77--96",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:2006:ETS,
  author =       "Yoonkyong Lee and Mayssam Sayyadian and AnHai Doan and
                 Arnon S. Rosenthal",
  title =        "{eTuner}: tuning schema matching software using
                 synthetic scenarios",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "97--122",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Burdick:2006:OUI,
  author =       "Doug Burdick and Prasad M. Deshpande and T. S. Jayram
                 and Raghu Ramakrishnan and Shivakumar Vaithyanathan",
  title =        "{OLAP} over uncertain and imprecise data",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "123--144",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Haftmann:2006:FER,
  author =       "Florian Haftmann and Donald Kossmann and Eric Lo",
  title =        "A framework for efficient regression tests on database
                 applications",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "145--164",
  month =        oct,
  year =         "2006",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 15 06:36:12 MST 2006",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Haas:2007:SIB,
  author =       "Laura M. Haas and Christian S. Jensen and Martin L.
                 Kersten",
  title =        "Special issue: best papers of {VLDB} 2005",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "1--3",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Godfrey:2007:AAM,
  author =       "Parke Godfrey and Ryan Shipley and Jarek Gryz",
  title =        "Algorithms and analyses for maximal vector
                 computation",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "5--28",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The maximal vector problem is to identify the maximals
                 over a collection of vectors. This arises in many
                 contexts and, as such, has been well studied. The
                 problem recently gained renewed attention with skyline
                 queries for relational databases and with work to
                 develop skyline algorithms that are external and
                 relationally well behaved. While many algorithms have
                 been proposed, how they perform has been unclear. We
                 study the performance of, and design choices behind,
                 these algorithms. We prove runtime bounds based on the
                 number of vectors $N$ and the dimensionality $K$. Early
                 algorithms based on {\em divide and conquer\/}
                 established seemingly good average and worst-case
                 asymptotic runtimes. In fact, the problem can be solved
                 in \mathcal{O}(KN) average-case (holding $K$ as fixed).
                 We prove, however, that the performance is quite bad
                 with respect to $K$. We demonstrate that the more
                 recent skyline algorithms are better behaved, and can
                 also achieve $\mathcal{O}(KN)$ average-case. While $K$
                 matters for these, in practice, its effect vanishes in
                 the asymptotic. We introduce a new external algorithm,
                 LESS, that is more efficient and better behaved. We
                 evaluate LESS's effectiveness and improvement
                 over the field, and prove that its average-case running
                 time is $\mathcal{O}(KN)$.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Larson:2007:VMO,
  author =       "Per-{\AA}ke Larson and Jingren Zhou",
  title =        "View matching for outer-join views",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "29--53",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Prior work on computing queries from materialized
                 views has focused on views defined by expressions
                 consisting of selection, projection, and inner joins,
                 with an optional aggregation on top (SPJG views). This
                 paper provides a view matching algorithm for views that
                 may also contain outer joins (SPOJG views). The
                 algorithm relies on a normal form for outer-join
                 expressions and is not based on bottom-up syntactic
                 matching of expressions. It handles any combination of
                 inner and outer joins, deals correctly with SQL bag
                 semantics, and exploits not-null constraints,
                 uniqueness constraints and foreign key constraints.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "aggregation; materialized views; outer joins; query
                 processing; view matching",
}

@Article{Markl:2007:CSE,
  author =       "V. Markl and P. J. Haas and M. Kutsch and N. Megiddo
                 and U. Srivastava and T. M. Tran",
  title =        "Consistent selectivity estimation via maximum
                 entropy",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "55--76",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Cost-based query optimizers need to estimate the
                 selectivity of conjunctive predicates when comparing
                 alternative query execution plans. To this end,
                 advanced optimizers use multivariate statistics to
                 improve information about the joint distribution of
                 attribute values in a table. The joint distribution for
                 all columns is almost always too large to store
                 completely, and the resulting use of partial
                 distribution information raises the possibility that
                 multiple, non-equivalent selectivity estimates may be
                 available for a given predicate. Current optimizers use
                 cumbersome ad hoc methods to ensure that selectivities
                 are estimated in a consistent manner. These methods
                 ignore valuable information and tend to bias the
                 optimizer toward query plans for which the least
                 information is available, often yielding poor results.
                 In this paper we present a novel method for consistent
                 selectivity estimation based on the principle of
                 maximum entropy (ME). Our method exploits all available
                 information and avoids the bias problem. In the absence
                 of detailed knowledge, the ME approach reduces to
                 standard uniformity and independence assumptions.
                 Experiments with our prototype implementation in DB2
                 UDB show that use of the ME approach can improve the
                 optimizer's cardinality estimates by orders of
                 magnitude, resulting in better plan quality and
                 significantly reduced query execution times. For almost
                 all queries, these improvements are obtained while
                 adding only tens of milliseconds to the overall time
                 required for query optimization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ghoting:2007:CCF,
  author =       "Amol Ghoting and Gregory Buehrer and Srinivasan
                 Parthasarathy and Daehyun Kim and Anthony Nguyen and
                 Yen-Kuang Chen and Pradeep Dubey",
  title =        "Cache-conscious frequent pattern mining on modern and
                 emerging processors",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "77--96",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Algorithms are typically designed to exploit the
                 current state of the art in processor technology.
                 However, as processor technology evolves, said
                 algorithms are often unable to derive the maximum
                 achievable performance on these modern architectures.
                 In this paper, we examine the performance of frequent
                 pattern mining algorithms on a modern processor. A
                 detailed performance study reveals that even the best
                 frequent pattern mining implementations, with highly
                 efficient memory managers, still grossly under-utilize
                 a modern processor. The primary performance bottlenecks
                 are {\em poor data locality\/} and {\em low instruction
                 level parallelism (ILP)}. We propose a {\em
                 cache-conscious prefix tree\/} to address this problem.
                 The resulting tree improves spatial locality and also
                 enhances the benefits from hardware cache line
                 prefetching. Furthermore, the design of this data
                 structure allows the use of {\em path tiling}, a novel
                 tiling strategy, to improve temporal locality. The
                 result is an overall speedup of up to 3.2 when compared
                 with state of the art implementations. We then show how
                 these algorithms can be improved further by realizing a
                 non-naive thread-based decomposition that targets {\em
                 simultaneously multi-threaded processors (SMT)}. A key
                 aspect of this decomposition is to ensure cache re-use
                 between threads that are co-scheduled at a fine
                 granularity. This optimization affords an additional
                 speedup of 50\%, resulting in an overall speedup of up
                 to 4.8. The proposed optimizations also provide
                 performance improvements on SMPs, and will most likely
                 be beneficial on emerging processors.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "architecture-conscious algorithms; association rule
                 mining; cache-conscious data mining; frequent itemset
                 mining; frequent pattern mining",
}

@Article{Lee:2007:ETS,
  author =       "Yoonkyong Lee and Mayssam Sayyadian and AnHai Doan and
                 Arnon S. Rosenthal",
  title =        "{eTuner}: tuning schema matching software using
                 synthetic scenarios",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "97--122",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Most recent schema matching systems assemble {\em
                 multiple components}, each employing a particular
                 matching technique. The domain user must then {\em
                 tune\/} the system: select the right component to be
                 executed and correctly adjust their numerous ``knobs''
                 (e.g., thresholds, formula coefficients). Tuning is
                 skill and time intensive, but (as we show) without it
                 the matching accuracy is significantly inferior. We
                 describe eTuner, an approach to {\em automatically\/}
                 tune schema matching systems. Given a schema $S$, we
                 match $S$ against synthetic schemas, for which the
                 ground truth mapping is known, and find a tuning that
                 demonstrably improves the performance of matching $S$
                 against real schemas. To efficiently search the huge
                 space of tuning configurations, eTuner works
                 sequentially, starting with tuning the lowest level
                 components. To increase the applicability of eTuner, we
                 develop methods to tune a broad range of matching
                 components. While the tuning process is completely
                 automatic, eTuner can also exploit user assistance
                 (whenever available) to further improve the tuning
                 quality. We employed eTuner to tune four recently
                 developed matching systems on several real-world
                 domains. The results show that eTuner produced tuned
                 matching systems that achieve higher accuracy than
                 using the systems with currently possible tuning
                 methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "compositional approach; machine learning; schema
                 matching; synthetic schemas; tuning",
}

@Article{Burdick:2007:OUI,
  author =       "Doug Burdick and Prasad M. Deshpande and T. S. Jayram
                 and Raghu Ramakrishnan and Shivakumar Vaithyanathan",
  title =        "{OLAP} over uncertain and imprecise data",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "123--144",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We extend the OLAP data model to represent data
                 ambiguity, specifically imprecision and uncertainty,
                 and introduce an allocation-based approach to the
                 semantics of aggregation queries over such data. We
                 identify three natural query properties and use them to
                 shed light on alternative query semantics. While there
                 is much work on representing and querying ambiguous
                 data, to our knowledge this is the first paper to
                 handle both imprecision and uncertainty in an OLAP
                 setting.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "aggregation; ambiguous; imprecision; uncertainty",
}

@Article{Haftmann:2007:FER,
  author =       "Florian Haftmann and Donald Kossmann and Eric Lo",
  title =        "A framework for efficient regression tests on database
                 applications",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "1",
  pages =        "145--164",
  month =        jan,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:22 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Regression testing is an important software
                 maintenance activity to ensure the integrity of a
                 software after modification. However, most methods and
                 tools developed for software testing today do not work
                 well for database applications; these tools only work
                 well if applications are stateless or tests can be
                 designed in such a way that they do not alter the
                 state. To execute tests for database applications
                 efficiently, the challenge is to control the state of
                 the database during testing and to order the test runs
                 such that expensive database {\em reset\/} operations
                 that bring the database into the right state need to be
                 executed as seldom as possible. This work devises a
                 regression testing framework for database applications
                 so that test runs can be executed in parallel. The goal
                 is to achieve linear speed-up and/or exploit the
                 available resources as well as possible. This problem
                 is challenging because parallel testing needs to
                 consider both load balancing and controlling the state
                 of the database. Experimental results show that test
                 run execution can achieve linear speed-up by using the
                 proposed framework.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database applications; regression tests",
}

@Article{Tanin:2007:UDQ,
  author =       "Egemen Tanin and Aaron Harwood and Hanan Samet",
  title =        "Using a distributed quadtree index in peer-to-peer
                 networks",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "165--178",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Peer-to-peer (P2P) networks have become a powerful
                 means for online data exchange. Currently, users are
                 primarily utilizing these networks to perform
                 exact-match queries and retrieve complete files.
                 However, future more data intensive applications, such
                 as P2P auction networks, P2P job-search networks, P2P
                 multiplayer games, will require the capability to
                 respond to more complex queries such as range queries
                 involving numerous data types including those that have
                 a spatial component. In this paper, a distributed
                 quadtree index that adapts the MX-CIF quadtree is
                 described that enables more powerful accesses to data
                 in P2P networks. This index has been implemented for
                 various prototype P2P applications and results of
                 experiments are presented. Our index is easy to use,
                 scalable, and exhibits good load-balancing properties.
                 Similar indices can be constructed for various
                 multidimensional data types with both spatial and
                 non-spatial components.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "distributed data structures; peer-to-peer networks;
                 quadtrees; spatial data structures",
}

@Article{Viqueira:2007:SES,
  author =       "Jose R. Rios Viqueira and Nikos A. Lorentzos",
  title =        "{SQL} extension for spatio-temporal data",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "179--200",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "An SQL extension is formalized for the management of
                 spatio-temporal data, i.e. of spatial data that evolves
                 with respect to time. The extension is dedicated to
                 applications such as topography, cartography, and
                 cadastral systems, hence it considers discrete changes
                 both in space and in {\em time}. It is based on the
                 rigid formalization of data types and of SQL
                 constructs. Data types are defined in terms of time and
                 {\em spatial quanta}. The SQL constructs are defined in
                 terms of a kernel of {\em few\/} relational algebra
                 operations, composed of the well-known operations of
                 the 1NF model and of two more, {\em Unfold\/} and {\em
                 Fold}. In conjunction with previous work, it enables
                 the uniform management of 1NF structures that may
                 contain not only spatio-temporal but also either purely
                 temporal or purely spatial or conventional data. The
                 syntax and semantics of the extension is fully
                 consistent with the {SQL:2003} standard.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data modelling; spatial databases; spatio-temporal
                 databases; SQL",
}

@Article{Dai:2007:CDC,
  author =       "Bi-Ru Dai and Cheng-Ru Lin and Ming-Syan Chen",
  title =        "Constrained data clustering by depth control and
                 progressive constraint relaxation",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "201--217",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In order to import the domain knowledge or
                 application-dependent parameters into the data mining
                 systems, constraint-based mining has attracted a lot of
                 research attention recently. In this paper, the
                 attributes employed to model the constraints are called
                 constraint attributes and those attributes involved in
                 the objective function to be optimized are called
                 optimization attributes. The constrained clustering
                 considered in this paper is conducted in such a way
                 that the objective function of optimization attributes
                 is optimized subject to the condition that the imposed
                 constraint is satisfied. Explicitly, we address the
                 problem of constrained clustering with numerical
                 constraints, in which the constraint attribute values
                 of any two data items in the same cluster are required
                 to be within the corresponding constraint range. This
                 numerical constrained clustering problem, however,
                 cannot be dealt with by any conventional clustering
                 algorithms. Consequently, we devise several effective
                 and efficient algorithms to solve such a clustering
                 problem. It is noted that due to the intrinsic nature
                 of the numerical constrained clustering, there is an
                 order dependency on the process of attaining the
                 clustering, which in many cases degrades the clustering
                 results. In view of this, we devise a {\em progressive
                 constraint relaxation\/} technique to remedy this
                 drawback and improve the overall performance of
                 clustering results. Explicitly, by using a smaller
                 (tighter) constraint range in earlier iterations of
                 merge, we will have more room to relax the constraint
                 and seek for better solutions in subsequent iterations.
                 It is empirically shown that the progressive constraint
                 relaxation technique is able to improve not only the
                 execution efficiency but also the clustering quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "constrained clustering; data clustering; data mining",
}

@Article{Shen:2007:ADD,
  author =       "Heng Tao Shen and Xiaofang Zhou and Aoying Zhou",
  title =        "An adaptive and dynamic dimensionality reduction
                 method for high-dimensional indexing",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "219--234",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The notorious ``dimensionality curse'' is a well-known
                 phenomenon for any multi-dimensional indexes attempting
                 to scale up to high dimensions. One well-known approach
                 to overcome degradation in performance with respect to
                 increasing dimensions is to reduce the dimensionality
                 of the original dataset before constructing the index.
                 However, identifying the correlation among the
                 dimensions and effectively reducing them are
                 challenging tasks. In this paper, we present an
                 adaptive {\em Multi-level Mahalanobis-based
                 Dimensionality Reduction\/} (MMDR) technique for
                 high-dimensional indexing. Our MMDR technique has four
                 notable features compared to existing methods. First,
                 it discovers elliptical clusters for more effective
                 dimensionality reduction by using only the
                 low-dimensional subspaces. Second, data points in the
                 different axis systems are indexed using a single
                 $B^+$-tree. Third, our technique is highly scalable in
                 terms of data size and dimension. Finally, it is also
                 dynamic and adaptive to insertions. An extensive
                 performance study was conducted using both real and
                 synthetic datasets, and the results show that our
                 technique not only achieves higher precision, but also
                 enables queries to be processed efficiently.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "correlated clustering; dimensionality reduction;
                 high-dimensional indexing; projection; subspace",
}

@Article{He:2007:PCC,
  author =       "Zhen He and Alonso Marquez",
  title =        "Path and cache conscious prefetching {(PCCP)}",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "235--249",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Main memory cache performance continues to play an
                 important role in determining the overall performance
                 of object-oriented, object-relational and XML
                 databases. An effective method of improving main memory
                 cache performance is to prefetch or pre-load pages in
                 advance to their usage, in anticipation of main memory
                 cache misses. In this paper we describe a framework for
                 creating prefetching algorithms with the novel features
                 of path and cache consciousness. Path consciousness
                 refers to the use of short sequences of object
                 references at key points in the reference trace to
                 identify paths of navigation. Cache consciousness
                 refers to the use of historical page access knowledge
                 to guess which pages are likely to be main memory cache
                 resident most of the time and then assumes these pages
                 do not exist in the context of prefetching. We have
                 conducted a number of experiments comparing our
                 approach against four highly competitive prefetching
                 algorithms. The results shows our approach outperforms
                 existing prefetching techniques in some situations
                 while performing worse in others. We provide guidelines
                 as to when our algorithm should be used and when others
                 maybe more desirable.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "caching; clustering; databases; prefetching",
}

@Article{Yu:2007:MBS,
  author =       "Hailing Yu and Divyakant Agrawal and Amr {El
                 Abbadi}",
  title =        "{MEMS} based storage architecture for relational
                 databases",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "251--268",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Due to recent advances in semiconductor manufacturing,
                 the gap between main memory and disks is constantly
                 increasing. This leads to a significant performance
                 bottleneck for Relational Database Management Systems.
                 Recent advances in nanotechnology have led to the
                 invention of MicroElectroMechanical Systems (MEMS)
                 based storage technology to replace disks. In this
                 paper, we exploit the physical characteristics of
                 MEMS-based storage devices to develop a placement
                 scheme for relational data that enables retrieval in
                 both row-wise and column-wise manner. We develop
                 algorithms for different relational operations based on
                 this data layout. Our experimental results and analysis
                 demonstrate that this data layout not only improves I/O
                 utilization, but results in better cache performance
                 for a variety of different relational operations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data placement; MEMS; relational databases; storage",
}

@Article{Yiannis:2007:CTF,
  author =       "John Yiannis and Justin Zobel",
  title =        "Compression techniques for fast external sorting",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "2",
  pages =        "269--291",
  month =        apr,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:23 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "External sorting of large files of records involves
                 use of disk space to store temporary files, processing
                 time for sorting, and transfer time between CPU, cache,
                 memory, and disk. Compression can reduce disk and
                 transfer costs, and, in the case of external sorts, cut
                 merge costs by reducing the number of runs. It is
                 therefore plausible that overall costs of external
                 sorting could be reduced through use of compression. In
                 this paper, we propose new compression techniques for
                 data consisting of sets of records. The best of these
                 techniques, based on building a trie of variable-length
                 common strings, provides fast compression and
                 decompression and allows random access to individual
                 records. We show experimentally that our trie-based
                 compression leads to significant reduction in sorting
                 costs; that is, it is faster to compress the data, sort
                 it, and then decompress it than to sort the
                 uncompressed data. While the degree of compression is
                 not quite as great as can be obtained with adaptive
                 techniques such as Lempel--Ziv methods, these cannot be
                 applied to sorting. Our experiments show that, in
                 comparison to approaches such as Huffman coding of
                 fixed-length substrings, our novel trie-based method is
                 faster and provides greater size reductions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "external sorting; query evaluation; semi-static
                 compression; sorting",
}

@Article{Jermaine:2007:PEF,
  author =       "Christopher Jermaine and Edward Omiecinski and Wai Gen
                 Yee",
  title =        "The partitioned exponential file for database storage
                 management",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "417--437",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The rate of increase in hard disk storage capacity
                 continues to outpace the rate of decrease in hard disk
                 seek time. This trend implies that the value of a seek
                 is increasing exponentially relative to the value of
                 storage.\par

                 With this trend in mind, we introduce the partitioned
                 exponential file (PE file) which is a generic storage
                 manager that can be customized for many different types
                 of data (e.g., numerical, spatial, or temporal). The PE
                 file is intended for use in environments with intense
                 update loads and concurrent, analytic queries. Such an
                 environment may be found, for example, in long-running
                 scientific applications which can produce petabytes of
                 data. For example, the proposed Large Synoptic Survey
                 Telescope [36] will produce 50---100 petabytes of
                 observational, scientific data over its multi-year
                 lifetime. This database will never be taken off-line,
                 so bursty update loads of tens of terabytes per day
                 must be handled concurrently with data analysis. In the
                 PE file, data are organized as a series of on-disk
                 sorts with a careful, global organization. Because the
                 PE file relies heavily on sequential I/O, only a
                 fraction of a disk seek is required for a typical
                 record insertion or retrieval.\par

                 In addition to describing the PE file, we also detail a
                 set of benchmarking experiments for T1SM, which is a PE
                 file customized for use with multi-attribute data
                 records ordered on a single numerical attribute. In our
                 benchmarking, we implement and test many competing data
                 organizations that can be used to index and store such
                 data, such as the B+-Tree, the LSM-Tree, the Buffer
                 Tree, the Stepped Merge Method, and the Y-Tree. As
                 expected, no organization is the best over all
                 benchmarks, but our experiments show that T1SM is the
                 best choice in many situations, suggesting that it is
                 the best overall. Specifically, T1SM performs
                 exceptionally well in the case of a heavy query
                 workload that must be handled concurrently with an
                 intense insertion stream. Our experiments show that
                 T1SM (and its close cousin, the T2SM storage manager
                 for spatial data) can handle very heavy mixed workloads
                 of this type, and still maintain acceptably small query
                 latencies.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data warehousing; indexing; storage management",
}

@Article{Deligiannakis:2007:DCH,
  author =       "Antonios Deligiannakis and Yannis Kotidis and Nick
                 Roussopoulos",
  title =        "Dissemination of compressed historical information in
                 sensor networks",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "439--461",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sensor nodes are small devices that `measure' their
                 environment and communicate feeds of low-level data
                 values to a base station for further processing and
                 archiving. Dissemination of these multi-valued feeds is
                 challenging because of the limited resources
                 (processing, bandwidth, energy) available in the nodes
                 of the network. In this paper, we first describe the
                 SBR algorithm for compressing multi-valued feeds
                 containing historical data from each sensor. The key to
                 our technique is the base signal, a series of values
                 extracted from the real measurements that is used to
                 provide piece-wise approximation of the measurements.
                 While our basic technique exploits correlations among
                 measurements taken on a single node, we further show
                 how it can be adapted to exploit correlations among
                 multiple nodes in a localized setting. Sensor nodes may
                 form clusters and, within a cluster, a group leader
                 identifies and coalesces similar measurements taken by
                 different nodes. This localized mode of operation
                 further improves the accuracy of the approximation,
                 typically by a factor from 5 to 15. We provide detailed
                 experiments of our algorithms and make direct
                 comparisons against standard approximation techniques
                 like Wavelets, Histograms and the Discrete Cosine
                 Transform, on a variety of error metrics and for real
                 data sets from different domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "compression; sensor networks",
}

@Article{Bohm:2007:FRA,
  author =       "Klemens B{\"o}hm and Erik Buchmann",
  title =        "Free riding-aware forwarding in {Content-Addressable
                 Networks}",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "463--482",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Research on P2P data structures has tacitly assumed
                 that peers readily participate in the work, i.e., are
                 cooperative. But such participation is voluntary, and
                 free riding is the dominant strategy. This article
                 describes a protocol that renders free riding
                 unattractive, for one particular P2P data structure.
                 The protocol is based on feedback that adjacent nodes
                 exchange. This induces transitive logical networks of
                 nodes that rule out uncooperative peers. The protocol
                 uses proofs of work to deter free riding. To show that
                 cooperative behavior dominates, we have come up with a
                 cost model that quantifies the overall cost of peers,
                 depending on their degree of cooperativeness and many
                 other parameters. The cost model tells us that we can
                 achieve a good discrimination against peers that are
                 less cooperative, with moderate additional cost for
                 cooperative peers. Extensive experiments confirm the
                 validity of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "distributed hashtables; free riding; incentives;
                 peer-to-peer; reputation",
}

@Article{Traina:2007:OFA,
  author =       "Caetano {Traina, Jr.} and Roberto F. Filho and Agma J.
                 Traina and Marcos R. Vieira and Christos Faloutsos",
  title =        "The {Omni-family} of all-purpose access methods: a
                 simple and effective way to make similarity search more
                 efficient",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "483--505",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Similarity search operations require executing
                 expensive algorithms, and although broadly useful in
                 many new applications, they rely on specific structures
                 not yet supported by commercial DBMS. In this paper we
                 discuss the new Omni-technique, which allows to build a
                 variety of dynamic Metric Access Methods based on a
                 number of selected objects from the dataset, used as
                 global reference objects. We call them as the
                 Omni-family of metric access methods. This technique
                 enables building similarity search operations on top of
                 existing structures, significantly improving their
                 performance, regarding the number of disk access and
                 distance calculations. Additionally, our methods scale
                 up well, exhibiting sub-linear behavior with growing
                 database size.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "index structures; metric access methods; multimedia
                 databases; similarity search",
}

@Article{Khan:2007:NID,
  author =       "Latifur Khan and Mamoun Awad and Bhavani
                 Thuraisingham",
  title =        "A new intrusion detection system using support vector
                 machines and hierarchical clustering",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "507--521",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Whenever an intrusion occurs, the security and value
                 of a computer system is compromised. Network-based
                 attacks make it difficult for legitimate users to
                 access various network services by purposely occupying
                 or sabotaging network resources and services. This can
                 be done by sending large amounts of network traffic,
                 exploiting well-known faults in networking services,
                 and by overloading network hosts. Intrusion Detection
                 attempts to detect computer attacks by examining
                 various data records observed in processes on the
                 network and it is split into two groups, anomaly
                 detection systems and misuse detection systems. Anomaly
                 detection is an attempt to search for malicious
                 behavior that deviates from established normal
                 patterns. Misuse detection is used to identify
                 intrusions that match known attack scenarios. Our
                 interest here is in anomaly detection and our proposed
                 method is a scalable solution for detecting
                 network-based anomalies. We use Support Vector Machines
                 (SVM) for classification. The SVM is one of the most
                 successful classification algorithms in the data mining
                 area, but its long training time limits its use. This
                 paper presents a study for enhancing the training time
                 of SVM, specifically when dealing with large data sets,
                 using hierarchical clustering analysis. We use the
                 Dynamically Growing Self-Organizing Tree (DGSOT)
                 algorithm for clustering because it has proved to
                 overcome the drawbacks of traditional hierarchical
                 clustering algorithms (e.g., hierarchical agglomerative
                 clustering). Clustering analysis helps find the
                 boundary points, which are the most qualified data
                 points to train SVM, between two classes. We present a
                 new approach of combination of SVM and DGSOT, which
                 starts with an initial training set and expands it
                 gradually using the clustering structure produced by
                 the DGSOT algorithm. We compare our approach with the
                 Rocchio Bundling technique and random selection in
                 terms of accuracy loss and training time gain using a
                 single benchmark real data set. We show that our
                 proposed variations contribute significantly in
                 improving the training process of SVM with high
                 generalization accuracy and outperform the Rocchio
                 Bundling technique.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Dalvi:2007:EQE,
  author =       "Nilesh Dalvi and Dan Suciu",
  title =        "Efficient query evaluation on probabilistic
                 databases",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "4",
  pages =        "523--544",
  month =        oct,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:25 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We describe a framework for supporting arbitrarily
                 complex SQL queries with `uncertain' predicates. The
                 query semantics is based on a probabilistic model and
                 the results are ranked, much like in Information
                 Retrieval. Our main focus is query evaluation. We
                 describe an optimization algorithm that can compute
                 efficiently most queries. We show, however, that the
                 data complexity of some queries is \#P-complete, which
                 implies that these queries do not admit any efficient
                 evaluation methods. For these queries we describe both
                 an approximation algorithm and a Monte-Carlo simulation
                 algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Croft:2008:ISI,
  author =       "W. Bruce Croft and Hans-J. Schek",
  title =        "Introduction to the special issue on database and
                 information retrieval integration",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "1--3",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Roelleke:2008:MRM,
  author =       "Thomas Roelleke and Hengzhi Wu and Jun Wang and Hany
                 Azzam",
  title =        "Modelling retrieval models in a probabilistic
                 relational algebra with a new operator: the relational
                 {Bayes}",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "5--37",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper presents a probabilistic relational
                 modelling (implementation) of the major probabilistic
                 retrieval models. Such a high-level implementation is
                 useful since it supports the ranking of any object, it
                 allows for the reasoning across structured and
                 unstructured data, and it gives the software
                 (knowledge) engineer control over ranking and thus
                 supports customisation. The contributions of this paper
                 include the specification of probabilistic SQL (PSQL)
                 and probabilistic relational algebra (PRA), a new
                 relational operator for probability estimation (the
                 relational Bayes), the probabilistic relational
                 modelling of retrieval models, a comparison of
                 modelling retrieval with traditional SQL versus
                 modelling retrieval with PSQL, and a comparison of the
                 performance of probability estimation with traditional
                 SQL versus PSQL. The main findings are that the
                 PSQL/PRA paradigm allows for the description of
                 advanced retrieval models, is suitable for solving
                 large-scale retrieval tasks, and outperforms
                 traditional SQL in terms of abstraction and performance
                 regarding probability estimation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "DB + IR integration; probabilistic databases;
                 probabilistic relational modelling; retrieval models",
}

@Article{Schmitt:2008:QDQ,
  author =       "Ingo Schmitt",
  title =        "{QQL}: {A DB\&IR Query Language}",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "39--56",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional database query languages are based on set
                 theory and crisp first order logic. However, many
                 applications require retrieval-like queries which
                 return result objects associated with a degree of being
                 relevant to the query. Historically, retrieval systems
                 estimate relevance by exploiting hidden object
                 semantics whereas query processing in database systems
                 relies on matching select-conditions with attribute
                 values. Thus, different mechanisms were developed for
                 database and information retrieval systems. In
                 consequence, there is a lack of support for queries
                 involving both retrieval and database search terms. In
                 this work, we introduce the quantum query language
                 (QQL). Its underlying unifying theory is based on the
                 mathematical formalism of quantum mechanics and quantum
                 logic. Van Rijsbergen already discussed the strong
                 relation between the formalism of quantum mechanics and
                 information retrieval. In this work, we interrelate
                 concepts from database query processing to concepts
                 from quantum mechanics and logic. As result, we obtain
                 a common theory which allows us to incorporate
                 seamlessly retrieval search into traditional database
                 query processing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database query language; DB \& IR; information
                 retrieval",
}

@Article{Lau:2008:MRM,
  author =       "Ho Lam Lau and Wilfred Ng",
  title =        "A multi-ranker model for adaptive {XML} searching",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "57--80",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The evolution of computing technology suggests that it
                 has become more feasible to offer access to Web
                 information in a ubiquitous way, through various kinds
                 of interaction devices such as PCs, laptops, palmtops,
                 and so on. As XML has become a de-facto standard for
                 exchanging Web data, an interesting and practical
                 research problem is the development of models and
                 techniques to satisfy various needs and preferences in
                 searching XML data. In this paper, we employ a list of
                 simple XML tagged keywords as a vehicle for searching
                 XML fragments in a collection of XML documents. In
                 order to deal with the diversified nature of XML
                 documents as well as user preferences, we propose a
                 novel multi-ranker model (MRM), which is able to
                 abstract a spectrum of important XML properties and
                 adapt the features to different XML search needs. The
                 MRM is composed of three ranking levels. The lowest
                 level consists of two categories of similarity and
                 granularity features. At the intermediate level, we
                 define four tailored XML rankers (XRs), which consist
                 of different lower level features and have different
                 strengths in searching XML fragments. The XRs are
                 trained via a learning mechanism called the Ranking
                 Support Vector Machine in a voting Spy Na{\"\i}ve Bayes
                 framework (RSSF). The RSSF takes as input a set of
                 labeled fragments and feature vectors and generates as
                 output Adaptive Rankers (ARs) in the learning process.
                 The ARs are defined over the XRs and generated at the
                 top level of the MRM. We show empirically that the RSSF
                 is able to improve the MRM significantly in the
                 learning process that needs only a small set of
                 training XML fragments. We demonstrate that the trained
                 MRM is able to bring out the strengths of the XRs in
                 order to adapt different preferences and queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Theobald:2008:TEV,
  author =       "Martin Theobald and Holger Bast and Debapriyo Majumdar
                 and Ralf Schenkel and Gerhard Weikum",
  title =        "{TopX}: efficient and versatile top-$k$ query
                 processing for semistructured data",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "81--115",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recent IR extensions to XML query languages such as
                 Xpath 1.0 Full-Text or the NEXI query language of the
                 INEX benchmark series reflect the emerging interest in
                 IR-style ranked retrieval over semistructured data.
                 TopX is a top-$k$ retrieval engine for text and
                 semistructured data. It terminates query execution as
                 soon as it can safely determine the $k$ top-ranked
                 result elements according to a monotonic score
                 aggregation function with respect to a multidimensional
                 query. It efficiently supports vague search on both
                 content- and structure-oriented query conditions for
                 dynamic query relaxation with controllable influence on
                 the result ranking. The main contributions of this
                 paper unfold into four main points: (1) fully
                 implemented models and algorithms for ranked XML
                 retrieval with XPath Full-Text functionality, (2)
                 efficient and effective top-$k$ query processing for
                 semistructured data, (3) support for integrating
                 thesauri and ontologies with statistically quantified
                 relationships among concepts, leveraged for word-sense
                 disambiguation and query expansion, and (4) a
                 comprehensive description of the TopX system, with
                 performance experiments on large-scale corpora like
                 TREC TeraByte and INEX Wikipedia.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "content- and structure-aware ranking; cost-based index
                 access scheduling; DB{\&} dynamic query expansion;
                 efficient XML full-text search; IR integration;
                 probabilistic candidate pruning; top-$k$ query
                 processing",
}

@Article{Simitsis:2008:PUK,
  author =       "Alkis Simitsis and Georgia Koutrika and Yannis
                 Ioannidis",
  title =        "Pr{\'e}cis: from unstructured keywords as queries to
                 structured databases as answers",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "117--149",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Pr{\'e}cis queries represent a novel way of accessing
                 data, which combines ideas and techniques from the
                 fields of databases and information retrieval. They are
                 free-form, keyword-based, queries on top of relational
                 databases that generate entire multi-relation
                 databases, which are logical subsets of the original
                 ones. A logical subset contains not only items directly
                 related to the given query keywords but also items
                 implicitly related to them in various ways, with the
                 purpose of providing to the user much greater insight
                 into the original data. In this paper, we lay the
                 foundations for the concept of logical database subsets
                 that are generated from pr{\'e}cis queries under a
                 generalized perspective that removes several
                 restrictions of previous work. In particular, we extend
                 the semantics of pr{\'e}cis queries considering that
                 they may contain multiple terms combined through the
                 AND, OR, and NOT operators. On the basis of these
                 extended semantics, we define the concept of a logical
                 database subset, we identify the one that is most
                 relevant to a given query, and we provide algorithms
                 for its generation. Finally, we present an extensive
                 set of experimental results that demonstrate the
                 efficiency and benefits of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "free-from queries; keyword search; query processing",
}

@Article{Cornacchia:2008:FEI,
  author =       "Roberto Cornacchia and S{\'a}ndor H{\'e}man and Marcin
                 Zukowski and Arjen P. Vries and Peter Boncz",
  title =        "Flexible and efficient {IR} using array databases",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "1",
  pages =        "151--168",
  month =        jan,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:26 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Matrix Framework is a recent proposal by
                 Information Retrieval (IR) researchers to flexibly
                 represent information retrieval models and concepts in
                 a single multi-dimensional array framework. We provide
                 computational support for exactly this framework with
                 the array database system SRAM (Sparse Relational Array
                 Mapping), that works on top of a DBMS. Information
                 retrieval models can be specified in its
                 comprehension-based array query language, in a way that
                 directly corresponds to the underlying mathematical
                 formulas. SRAM efficiently stores sparse arrays in
                 (compressed) relational tables and translates and
                 optimizes array queries into relational queries. In
                 this work, we describe a number of array query
                 optimization rules. To demonstrate their effect on text
                 retrieval, we apply them in the TREC TeraByte track
                 (TREC-TB) efficiency task, using the Okapi BM25 model
                 as our example. It turns out that these optimization
                 rules enable SRAM to automatically translate the BM25
                 array queries into the relational equivalent of
                 inverted list processing including compression, score
                 materialization and quantization, such as employed by
                 custom-built IR systems. The use of the
                 high-performance MonetDB/X100 relational backend, that
                 provides transparent database compression, allows the
                 system to achieve very fast response times with good
                 precision and low resource usage.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "array databases; database compression; information
                 retrieval; query optimization",
}

@Article{Lockemann:2008:MKR,
  author =       "Peter C. Lockemann",
  title =        "In memoriam {Klaus R. Dittrich} (1950---2007)",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "169--170",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Alonso:2008:GEM,
  author =       "Gustavo Alonso and David Lomet and Umesh Dayal",
  title =        "Guest {Editors}' message",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "171--172",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gemulla:2008:MBS,
  author =       "Rainer Gemulla and Wolfgang Lehner and Peter J.
                 Haas",
  title =        "Maintaining bounded-size sample synopses of evolving
                 datasets",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "173--201",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Perhaps the most flexible synopsis of a database is a
                 uniform random sample of the data; such samples are
                 widely used to speed up processing of analytic queries
                 and data-mining tasks, enhance query optimization, and
                 facilitate information integration. The ability to
                 bound the maximum size of a sample can be very
                 convenient from a system-design point of view, because
                 the task of memory management is simplified, especially
                 when many samples are maintained simultaneously. In
                 this paper, we study methods for incrementally
                 maintaining a bounded-size uniform random sample of the
                 items in a dataset in the presence of an arbitrary
                 sequence of insertions and deletions. For `stable'
                 datasets whose size remains roughly constant over time,
                 we provide a novel sampling scheme, called `random
                 pairing' (RP), that maintains a bounded-size uniform
                 sample by using newly inserted data items to compensate
                 for previous deletions. The RP algorithm is the first
                 extension of the 45-year-old reservoir sampling
                 algorithm to handle deletions; RP reduces to the
                 `passive' algorithm of Babcock et al. when the
                 insertions and deletions correspond to a moving window
                 over a data stream. Experiments show that, when
                 dataset-size fluctuations over time are not too
                 extreme, RP is the algorithm of choice with respect to
                 speed and sample-size stability. For `growing'
                 datasets, we consider algorithms for periodically
                 resizing a bounded-size random sample upwards. We prove
                 that any such algorithm cannot avoid accessing the base
                 data, and provide a novel resizing algorithm that
                 minimizes the time needed to increase the sample size.
                 We also show how to merge uniform samples from disjoint
                 datasets to obtain a uniform sample of the union of the
                 datasets; the merged sample can be incrementally
                 maintained. Our new RPMerge algorithm extends the
                 HRMerge algorithm of Brown and Haas to effectively deal
                 with deletions, thereby facilitating efficient parallel
                 sampling.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database sampling; reservoir sampling; sample
                 maintenance; synopsis",
}

@Article{Yu:2008:XSR,
  author =       "Cong Yu and H. V. Jagadish",
  title =        "{XML} schema refinement through redundancy detection
                 and normalization",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "203--223",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "As XML becomes increasingly popular, XML schema design
                 has become an increasingly important issue. One of the
                 central objectives of good schema design is to avoid
                 data redundancies: redundantly stored information can
                 lead not just only to a higher data storage cost but
                 also to increased costs for data transfer and data
                 manipulation. Furthermore, such data redundancies can
                 lead to potential update anomalies, rendering the
                 database inconsistent. One strategy to avoid data
                 redundancies is to design redundancy-free schema from
                 the start on the basis of known functional
                 dependencies. We observe that XML databases are often
                 `casually designed' and XML FDs may not be determined
                 in advance. Under such circumstances, discovering XML
                 data redundancies from the data itself becomes
                 necessary and is an integral part of the schema
                 refinement (or re-design) process. We present the
                 design and implementation of the first system,
                 DiscoverXFD, for efficient discovery of XML data
                 redundancies. It employs a novel XML data structure and
                 introduces a new class of partition-based algorithms.
                 The XML data redundancies are defined on the basis of a
                 new notion of XML functional dependency (XML FD) that
                 (1) extends previous notions by incorporating set
                 elements into the XML FD specification, and (2)
                 maintains tuple-based semantics through the novel
                 concept of Generalized Tree Tuple (GTT). Using this
                 comprehensive XML FD notion, we introduce a new normal
                 form (GTT-XNF) for XML documents, and provide
                 comprehensive comparisons with previous studies. Given
                 the set of data redundancies (in the form of
                 redundancy-indicating XML FDs) discovered by
                 DiscoverXFD, we describe a normalization algorithm for
                 converting any original XML schema into one in
                 GTT-XNF.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data redundancy; functional dependency; normal form;
                 schema design; XML",
}

@Article{Mitra:2008:TKS,
  author =       "Soumyadeb Mitra and Marianne Winslett and Windsor W.
                 Hsu and Kevin Chen-Chuan Chang",
  title =        "Trustworthy keyword search for compliance storage",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "225--242",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Intense regulatory focus on secure retention of
                 electronic records has led to a need to ensure that
                 records are trustworthy, i.e., able to provide
                 irrefutable proof and accurate details of past events.
                 In this paper, we analyze the requirements for a
                 trustworthy index to support keyword-based search
                 queries. We argue that trustworthy index entries must
                 be durable--the index must be updated when new
                 documents arrive, and not periodically deleted and
                 rebuilt. To this end, we propose a scheme for
                 efficiently updating an inverted index, based on
                 judicious merging of the posting lists of terms.
                 Through extensive simulations and experiments with two
                 real world data sets and workloads, we demonstrate that
                 the scheme achieves online update speed while
                 maintaining good query performance. We also present and
                 evaluate jump indexes, a novel trustworthy and
                 efficient index for join operations on posting lists
                 for multi-keyword queries. Jump indexes support insert,
                 lookup and range queries in time logarithmic in the
                 number of indexed documents.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "compliance storage; inverted index; jump index",
}

@Article{Benjelloun:2008:DUL,
  author =       "Omar Benjelloun and Anish Das Sarma and Alon Halevy
                 and Martin Theobald and Jennifer Widom",
  title =        "Databases with uncertainty and lineage",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "243--264",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper introduces uldbs, an extension of
                 relational databases with simple yet expressive
                 constructs for representing and manipulating both
                 lineage and uncertainty. Uncertain data and data
                 lineage are two important areas of data management that
                 have been considered extensively in isolation, however
                 many applications require the features in tandem.
                 Fundamentally, lineage enables simple and consistent
                 representation of uncertain data, it correlates
                 uncertainty in query results with uncertainty in the
                 input data, and query processing with lineage and
                 uncertainty together presents computational benefits
                 over treating them separately. We show that the uldb
                 representation is complete, and that it permits
                 straightforward implementation of many relational
                 operations. We define two notions of uldb
                 minimality--data-minimal and lineage-minimal--and study
                 minimization of uldb representations under both
                 notions. With lineage, derived relations are no longer
                 self-contained: their uncertainty depends on
                 uncertainty in the base data. We provide an algorithm
                 for the new operation of extracting a database subset
                 in the presence of interconnected uncertainty. We also
                 show how uldbs enable a new approach to query
                 processing in probabilistic databases. Finally, we
                 describe the current state of the Trio system, our
                 implementation of uldbs under development at
                 Stanford.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "lineage; probabilistic data management; provenance;
                 uncertainty in databases",
}

@Article{Jeffery:2008:ARM,
  author =       "Shawn R. Jeffery and Michael J. Franklin and Minos
                 Garofalakis",
  title =        "An adaptive {RFID} middleware for supporting
                 metaphysical data independence",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "265--289",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sensor devices produce data that are unreliable,
                 low-level, and seldom able to be used directly by
                 applications. In this paper, we propose metaphysical
                 data independence (MDI), a layer of independence that
                 shields applications from the challenges that arise
                 when interacting directly with sensor devices. The key
                 philosophy behind MDI is that applications do not deal
                 with any aspect of physical device data, but rather
                 interface with a high-level reconstruction of the
                 physical world created by a sensor infrastructure. As a
                 concrete instantiation of MDI in such a sensor
                 infrastructure, we detail MDI-SMURF, a Radio Frequency
                 Identification (RFID) middleware system that alleviates
                 issues associated with using RFID data through adaptive
                 techniques based on a novel statistical framework.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "data cleaning; RFID technology; sensor-based
                 applications; statistical sampling",
}

@Article{Parreira:2008:JAP,
  author =       "Josiane Xavier Parreira and Carlos Castillo and Debora
                 Donato and Sebastian Michel and Gerhard Weikum",
  title =        "The {Juxtaposed} approximate {PageRank} method for
                 robust {PageRank} approximation in a peer-to-peer web
                 search network",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "291--313",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0057-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat May 8 18:33:08 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present Juxtaposed approximate PageRank (JXP), a
                 distributed algorithm for computing PageRank-style
                 authority scores of Web pages on a peer-to-peer (P2P)
                 network. Unlike previous algorithms, JXP allows peers
                 to have overlapping content and requires no a priori
                 knowledge of other peers' content. Our algorithm
                 combines locally computed authority scores with
                 information obtained from other peers by means of
                 random meetings among the peers in the network. This
                 computation is based on a Markov-chain state-lumping
                 technique, and iteratively approximates global
                 authority scores. The algorithm scales with the number
                 of peers in the network and we show that the JXP scores
                 converge to the true PageRank scores that one would
                 obtain with a centralized algorithm. Finally, we show
                 how to deal with misbehaving peers by extending JXP
                 with a reputation model.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "link analysis; Markov chain aggregation; peer-to-peer
                 systems; social reputation; Web graph",
}

@Article{Narayanan:2008:DAQ,
  author =       "Dushyanth Narayanan and Austin Donnelly and Richard
                 Mortier and Antony Rowstron",
  title =        "Delay aware querying with {Seaweed}",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "315--331",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Large highly distributed data sets are poorly
                 supported by current query technologies. Applications
                 such as endsystem-based network management are
                 characterized by data stored on large numbers of
                 endsystems, with frequent local updates and relatively
                 infrequent global one-shot queries. The challenges are
                 scale ($10^3$ to $10^9$ endsystems) and endsystem
                 unavailability. In such large systems, a significant
                 fraction of endsystems and their data will be
                 unavailable at any given time. Existing methods to
                 provide high data availability despite endsystem
                 unavailability involve centralizing, redistributing or
                 replicating the data. At large scale these methods are
                 not scalable. We advocate a design that trades query
                 delay for completeness, incrementally returning results
                 as endsystems become available. We also introduce the
                 idea of completeness prediction, which provides the
                 user with explicit feedback about this
                 delay/completeness trade-off. Completeness prediction
                 is based on replication of compact data summaries and
                 availability models. This metadata is orders of
                 magnitude smaller than the data. Seaweed is a scalable
                 query infrastructure supporting incremental results,
                 online in-network aggregation and completeness
                 prediction. It is built on a distributed hash table
                 (DHT) but unlike previous DHT based approaches it does
                 not redistribute data across the network. It exploits
                 the DHT infrastructure for failure-resilient metadata
                 replication, query dissemination, and result
                 aggregation. We analytically compare Seaweed's
                 scalability against other approaches and also evaluate
                 the Seaweed prototype running on a large-scale network
                 simulator driven by real-world traces.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bernstein:2008:IMC,
  author =       "Philip A. Bernstein and Todd J. Green and Sergey
                 Melnik and Alan Nash",
  title =        "Implementing mapping composition",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "2",
  pages =        "333--353",
  month =        mar,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:27 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Mapping composition is a fundamental operation in
                 metadata driven applications. Given a mapping over
                 schemas $\caret{A}_1$ and $\caret{A}_2$ and a mapping
                 over schemas $\caret{A}_2$ and $\caret{A}_3$, the
                 composition problem is to compute an equivalent mapping
                 over $\caret{A}_1$ and $\caret{A}_3$. We describe a new
                 composition algorithm that targets practical
                 applications. It incorporates view unfolding. It
                 eliminates as many $\caret{A}_2$ symbols as possible,
                 even if not all can be eliminated. It covers
                 constraints expressed using arbitrary monotone
                 relational operators and, to a lesser extent,
                 non-monotone operators. And it introduces the new
                 technique of left composition. We describe our
                 implementation, explain how to extend it to support
                 user-defined operators, and present experimental
                 results which validate its effectiveness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "mapping composition; model management; schema
                 mappings",
}

@Article{Li:2008:ESF,
  author =       "Yunyao Li and Cong Yu and H. V. Jagadish",
  title =        "Enabling {Schema-Free XQuery} with meaningful query
                 focus",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "355--377",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The widespread adoption of XML holds the promise that
                 document structure can be exploited to specify precise
                 database queries. However, users may have only a
                 limited knowledge of the XML structure, and may be
                 unable to produce a correct XQuery expression,
                 especially in the context of a heterogeneous
                 information collection. The default is to use
                 keyword-based search and we are all too familiar with
                 how difficult it is to obtain precise answers by these
                 means. We seek to address these problems by introducing
                 the notion of Meaningful Query Focus (MQF) for finding
                 related nodes within an XML document. MQF enables users
                 to take full advantage of the preciseness and
                 efficiency of XQuery without requiring (perfect)
                 knowledge of the document structure. Such a Schema-Free
                 XQuery is potentially of value not just to casual users
                 with partial knowledge of schema, but also to experts
                 working in data integration or data evolution. In such
                 a context, a schema-free query, once written, can be
                 applied universally to multiple data sources that
                 supply similar content under different schemas, and
                 applied `forever' as these schemas evolve. Our
                 experimental evaluation found that it is possible to
                 express a wide variety of queries in a schema-free
                 manner and efficiently retrieve correct results over a
                 broad diversity of schemas. Furthermore, the evaluation
                 of a schema-free query is not expensive: using a novel
                 stack-based algorithm we developed for computing MQF,
                 the overhead is from 1 to 4 times the execution time of
                 an equivalent schema-aware query. The evaluation cost
                 of schema-free queries can be further reduced by as
                 much as 68\% using a selectivity-based algorithm we
                 develop to enable the integration of MQF operation into
                 the query pipeline.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "hierarchical; query language; schema; semi-structured;
                 XML; XQuery",
}

@Article{Yiu:2008:BTI,
  author =       "Man Lung Yiu and Yufei Tao and Nikos Mamoulis",
  title =        "The {Bdual-Tree}: indexing moving objects by space
                 filling curves in the dual space",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "379--400",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Existing spatiotemporal indexes suffer from either
                 large update cost or poor query performance, except for
                 the $B_x$-tree (the state-of-the-art), which consists
                 of multiple $B+$-trees indexing the 1D values
                 transformed from the (multi-dimensional) moving objects
                 based on a space filling curve (Hilbert, in
                 particular). This curve, however, does not consider
                 object velocities, and as a result, query processing
                 with a $B_x$-tree retrieves a large number of false
                 hits, which seriously compromises its efficiency. It is
                 natural to wonder `can we obtain better performance by
                 capturing also the velocity information, using a
                 Hilbert curve of a higher dimensionality?'. This paper
                 provides a positive answer by developing the $B$
                 dual-tree, a novel spatiotemporal access method
                 leveraging pure relational methodology. We show, with
                 theoretical evidence, that the $B$ dual-tree indeed
                 outperforms the $B_x$-tree in most circumstances.
                 Furthermore, our technique can effectively answer
                 progressive spatiotemporal queries, which are poorly
                 supported by $B_x$-trees.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access method; space filling curve; spatiotemporal",
}

@Article{Awad:2008:PWS,
  author =       "Mamoun Awad and Latifur Khan and Bhavani
                 Thuraisingham",
  title =        "Predicting {WWW} surfing using multiple evidence
                 combination",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "401--417",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The improvement of many applications such as web
                 search, latency reduction, and personalization/
                 recommendation systems depends on surfing prediction.
                 Predicting user surfing paths involves tradeoffs
                 between model complexity and predictive accuracy. In
                 this paper, we combine two classification techniques,
                 namely, the Markov model and Support Vector Machines
                 (SVM), to resolve prediction using Dempster's rule.
                 Such fusion overcomes the inability of the Markov model
                 in predicting the unseen data as well as overcoming the
                 problem of multiclassification in the case of SVM,
                 especially when dealing with large number of classes.
                 We apply feature extraction to increase the power of
                 discrimination of SVM. In addition, during prediction
                 we employ domain knowledge to reduce the number of
                 classifiers for the improvement of accuracy and the
                 reduction of prediction time. We demonstrate the
                 effectiveness of our hybrid approach by comparing our
                 results with widely used techniques, namely, SVM, the
                 Markov model, and association rule mining.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2008:HBM,
  author =       "Hai Wang and Kenneth C. Sevcik",
  title =        "Histograms based on the minimum description length
                 principle",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "419--442",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Histograms have been widely used for selectivity
                 estimation in query optimization, as well as for fast
                 approximate query answering in many OLAP, data mining,
                 and data visualization applications. This paper
                 presents a new family of histograms, the Hierarchical
                 Model Fitting (HMF) histograms, based on the Minimum
                 Description Length principle. Rather than having each
                 bucket of a histogram described by the same type of
                 model, the HMF histograms employ a local optimal model
                 for each bucket. The improved effectiveness of the
                 locally chosen models offsets more than the overhead of
                 keeping track of the representation of each individual
                 bucket. Through a set of experiments, we show that the
                 HMF histograms are capable of providing more accurate
                 approximations than previously proposed techniques for
                 many real and synthetic data sets across a variety of
                 query workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "approximate query answering; data summarization;
                 histograms; query processing",
}

@Article{Deligiannakis:2008:BCQ,
  author =       "Antonios Deligiannakis and Yannis Kotidis and Nick
                 Roussopoulos",
  title =        "Bandwidth-constrained queries in sensor networks",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "443--467",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sensor networks consist of battery-powered wireless
                 devices that are required to operate unattended for
                 long periods of time. Thus, reducing energy drain is of
                 utmost importance when designing algorithms and
                 applications for such networks. Aggregate queries are
                 often used by monitoring applications to assess the
                 status of the network and detect abnormal behavior.
                 Since radio transmission often constitutes the biggest
                 factor of energy drain in a node, in this paper we
                 propose novel algorithms for the evaluation of
                 bandwidth-constrained queries over sensor networks. The
                 goal of our techniques is, given a target bandwidth
                 utilization factor, to program the sensor nodes in a
                 way that seeks to maximize the accuracy of the produced
                 query results at the monitoring node, while always
                 providing strong error guarantees to the monitoring
                 application. This is a distinct difference of our
                 framework from previous techniques that only provide
                 probabilistic guarantees on the accuracy of the query
                 result. Our algorithms are equally applicable when the
                 nodes have ample power resources, but bandwidth
                 consumption needs to be minimized, for instance in
                 densely distributed networks, to ensure proper
                 operation of the nodes. Our experiments with real
                 sensor data show that bandwidth-constrained queries can
                 substantially reduce the number of messages in the
                 network while providing very tight error bounds on the
                 query result.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "in-network aggregation; sensor networks",
}

@Article{Hammad:2008:QPM,
  author =       "Moustafa A. Hammad and Walid G. Aref and Ahmed K.
                 Elmagarmid",
  title =        "Query processing of multi-way stream window joins",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "469--488",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper introduces a class of join algorithms,
                 termed W-join, for joining multiple infinite data
                 streams. W-join addresses the infinite nature of the
                 data streams by joining stream data items that lie
                 within a sliding window and that match a certain join
                 condition. In addition to its general applicability in
                 stream query processing, W-join can be used to track
                 the motion of a moving object or detect the propagation
                 of clouds of hazardous material or pollution spills
                 over time in a sensor network environment. We describe
                 two new algorithms for W-join and address variations
                 and local/global optimizations related to specifying
                 the nature of the window constraints to fulfill the
                 posed queries. The performance of the proposed
                 algorithms is studied experimentally in a prototype
                 stream database system, using synthetic data streams
                 and real time-series data. Tradeoffs of the proposed
                 algorithms and their advantages and disadvantages are
                 highlighted, given variations in the aggregate arrival
                 rates of the input data streams and the desired
                 response times per query.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "multi-way window join; stream query processing",
}

@Article{Luo:2008:FBP,
  author =       "Qiong Luo and Jeffrey F. Naughton and Wenwei Xue",
  title =        "Form-based proxy caching for database-backed web
                 sites: keywords and functions",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "489--513",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Web caching proxy servers are essential for improving
                 web performance and scalability, and recent research
                 has focused on making proxy caching work for
                 database-backed web sites. In this paper, we explore a
                 new proxy caching framework that exploits the query
                 semantics of HTML forms. We identify two common classes
                 of form-based queries from real-world database-backed
                 web sites, namely, keyword-based queries and
                 function-embedded queries. Using typical examples of
                 these queries, we study two representative caching
                 schemes within our framework: (i) traditional passive
                 query caching, and (ii) active query caching, in which
                 the proxy cache can service a request by evaluating a
                 query over the contents of the cache. Results from our
                 experimental implementation show that our form-based
                 proxy is a general and flexible approach that
                 efficiently enables active caching schemes for
                 database-backed web sites. Furthermore, handling query
                 containment at the proxy yields significant performance
                 advantages over passive query caching, but extending
                 the power of the active cache to do full semantic
                 caching appears to be less generally effective.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "database-backed Web sites; Web proxy caching",
}

@Article{Wang:2008:EAM,
  author =       "Yida Wang and Ee-Peng Lim and San-Yih Hwang",
  title =        "Efficient algorithms for mining maximal valid groups",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "515--535",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A valid group is defined as a group of moving users
                 that are within a distance threshold from one another
                 for at least a minimum time duration. Unlike grouping
                 of users determined by traditional clustering
                 algorithms, members of a valid group are expected to
                 stay close to one another during their movement. Each
                 valid group suggests some social grouping that can be
                 used in targeted marketing and social network analysis.
                 The existing valid group mining algorithms are designed
                 to mine a complete set of valid groups from time series
                 of user location data, known as the user movement
                 database. Unfortunately, there are considerable
                 redundancy in the complete set of valid groups. In this
                 paper, we therefore address this problem of mining the
                 set of maximal valid groups. We first extend our
                 previous valid group mining algorithms to mine maximal
                 valid groups, leading to AMG and VGMax algorithms. We
                 further propose the VGBK algorithm based on maximal
                 clique enumeration to mine the maximal valid groups.
                 The performance results of these algorithms under
                 different sets of mining parameters are also
                 reported.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yu:2008:DMW,
  author =       "Qi Yu and Xumin Liu and Athman Bouguettaya and Brahim
                 Medjahed",
  title =        "Deploying and managing {Web} services: issues,
                 solutions, and directions",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "537--572",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Web services are expected to be the key technology in
                 enabling the next installment of the Web in the form of
                 the Service Web. In this paradigm shift, Web services
                 would be treated as first-class objects that can be
                 manipulated much like data is now manipulated using a
                 database management system. Hitherto, Web services have
                 largely been driven by standards. However, there is a
                 strong impetus for defining a solid and integrated
                 foundation that would facilitate the kind of
                 innovations witnessed in other fields, such as
                 databases. This survey focuses on investigating the
                 different research problems, solutions, and directions
                 to deploying Web services that are managed by an
                 integrated Web Service Management System (WSMS). The
                 survey identifies the key features of a WSMS and
                 conducts a comparative study on how current research
                 approaches and projects fit in.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "interoperability; service-oriented computing; Web
                 service management system",
}

@Article{Li:2008:EUD,
  author =       "Changqing Li and Tok Wang Ling and Min Hu",
  title =        "Efficient updates in dynamic {XML} data: from binary
                 string to quaternary string",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "3",
  pages =        "573--601",
  month =        may,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:29 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "XML query processing based on labeling schemes has
                 been thoroughly studied in the past several years.
                 Recently efficient processing of updates in dynamic XML
                 data has gained more attention. However, all the
                 existing techniques have high update cost, they cannot
                 completely avoid re-labeling in XML updates, and they
                 will increase the label size which will influence the
                 query performance. Thus, in this paper we propose a
                 novel Compact Dynamic Binary String (CDBS) encoding to
                 efficiently process updates. CDBS has two important
                 properties which form the foundations of this paper:
                 (1) CDBS supports that CDBS codes can be inserted
                 between any two consecutive CDBS codes with orders kept
                 and without re-encoding the existing codes; (2) CDBS is
                 orthogonal to specific labeling schemes; thus it can be
                 applied broadly to different labeling schemes or other
                 applications to efficiently process updates. Moreover,
                 because CDBS will encounter the overflow problem, we
                 improve CDBS to Compact Dynamic Quaternary String
                 (CDQS) encoding which can completely avoid re-labeling
                 in XML leaf node updates no matter what the labeling
                 schemes are. Meanwhile, we also discuss how to
                 efficiently process internal node updates. We report
                 the experimental results to show that our CDBS and CDQS
                 are superior to previous approaches to process both
                 leaf node and internal node updates.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tao:2007:MRK,
  author =       "Yufei Tao and Dimitris Papadias and Xiang Lian and
                 Xiaokui Xiao",
  title =        "Multidimensional reverse {kNN} search",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "3",
  pages =        "293--316",
  month =        jul,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:24 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a multidimensional point $q$, a reverse $k$
                 nearest neighbor (RkNN) query retrieves all the data
                 points that have $q$ as one of their $k$ nearest
                 neighbors. Existing methods for processing such queries
                 have at least one of the following deficiencies: they
                 (i) do not support arbitrary values of $k$, (ii) cannot
                 deal efficiently with database updates, (iii) are
                 applicable only to 2D data but not to higher
                 dimensionality, and (iv) retrieve only approximate
                 results. Motivated by these shortcomings, we develop
                 algorithms for exact RkNN processing with arbitrary
                 values of $k$ on dynamic, multidimensional datasets.
                 Our methods utilize a conventional data-partitioning
                 index on the dataset and do not require any
                 pre-computation. As a second step, we extend the
                 proposed techniques to continuous RkNN search, which
                 returns the RkNN results for every point on a line
                 segment. We evaluate the effectiveness of our
                 algorithms with extensive experiments using both real
                 and synthetic datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "continuous search; reverse nearest neighbor; spatial
                 database",
}

@Article{Koch:2007:AGS,
  author =       "Christoph Koch and Stefanie Scherzinger",
  title =        "Attribute grammars for scalable query processing on
                 {XML} streams",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "3",
  pages =        "317--342",
  month =        jul,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:24 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We introduce the notion of XML Stream Attribute
                 Grammars (XSAGs). XSAGs are the first scalable query
                 language for XML streams (running strictly in linear
                 time with bounded memory consumption independent of the
                 size of the stream) that allows for actual data
                 transformations rather than just document filtering.
                 XSAGs are also relatively easy to use for humans.
                 Moreover, the XSAG formalism provides a strong
                 intuition for which queries can or cannot be processed
                 scalably on streams. We introduce XSAGs together with
                 the necessary language-theoretic machinery, study their
                 theoretical properties such as expressiveness and
                 complexity, and discuss their implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "attribute grammars; query languages; stream
                 processing; XML",
}

@Article{Chan:2007:OES,
  author =       "Edward P. Chan and Heechul Lim",
  title =        "Optimization and evaluation of shortest path queries",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "3",
  pages =        "343--369",
  month =        jul,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:24 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We investigate the problem of how to evaluate
                 efficiently a collection of shortest path queries on
                 massive graphs that are too big to fit in the main
                 memory. To evaluate a shortest path query efficiently,
                 we introduce two pruning algorithms. These algorithms
                 differ on the extent of materialization of shortest
                 path cost and on how the search space is pruned. By
                 grouping shortest path queries properly, batch
                 processing improves the performance of shortest path
                 query evaluation. Extensive study is also done on
                 fragment sizes, cache sizes and query types that we
                 show that affect the performance of a disk-based
                 shortest path algorithm. The performance and
                 scalability of proposed techniques are evaluated with
                 large road systems in the Eastern United States. To
                 demonstrate that the proposed disk-based algorithms are
                 viable, we show that their search times are significant
                 better than that of main-memory Dijkstra's algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "disk-based algorithms; graph algorithms; graph
                 pruning; query evaluation and optimization; route
                 queries; shortest path queries",
}

@Article{Lee:2007:DPI,
  author =       "Jae-Gil Lee and Kyu-Young Whang and Wook-Shin Han and
                 Il-Yeol Song",
  title =        "The dynamic predicate: integrating access control with
                 query processing in {XML} databases",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "3",
  pages =        "371--387",
  month =        jul,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:24 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recently, access control on XML data has become an
                 important research topic. Previous research on access
                 control mechanisms for XML data has focused on
                 increasing the efficiency of access control itself, but
                 has not addressed the issue of integrating access
                 control with query processing. In this paper, we
                 propose an efficient access control mechanism tightly
                 integrated with query processing for XML databases. We
                 present the novel concept of the dynamic predicate
                 $\caret{A}$(DP), which represents a dynamically
                 constructed condition during query execution. A DP is
                 derived from instance-level authorizations and
                 constrains accessibility of the elements. The DP allows
                 us to effectively integrate authorization checking into
                 the query plan so that unauthorized elements are
                 excluded in the process of query execution.
                 Experimental results show that the proposed access
                 control mechanism improves query processing time
                 significantly over the state-of-the-art access control
                 mechanisms. We conclude that the DP is highly effective
                 in efficiently checking instance-level authorizations
                 in databases with hierarchical structures.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access control; privacy/security; query processing;
                 XML databases",
}

@Article{Papazoglou:2007:SOA,
  author =       "Mike P. Papazoglou and Willem-Jan Heuvel",
  title =        "Service oriented architectures: approaches,
                 technologies and research issues",
  journal =      j-VLDB-J,
  volume =       "16",
  number =       "3",
  pages =        "389--415",
  month =        jul,
  year =         "2007",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:24 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Service-oriented architectures (SOA) is an emerging
                 approach that addresses the requirements of loosely
                 coupled, standards-based, and protocol-independent
                 distributed computing. Typically business operations
                 running in an SOA comprise a number of invocations of
                 these different components, often in an event-driven or
                 asynchronous fashion that reflects the underlying
                 business process needs. To build an SOA a highly
                 distributable communications and integration backbone
                 is required. This functionality is provided by the
                 Enterprise Service Bus (ESB) that is an integration
                 platform that utilizes Web services standards to
                 support a wide variety of communications patterns over
                 multiple transport protocols and deliver value-added
                 capabilities for SOA applications. This paper reviews
                 technologies and approaches that unify the principles
                 and concepts of SOA with those of event-based
                 programming. The paper also focuses on the ESB and
                 describes a range of functions that are designed to
                 offer a manageable, standards-based SOA backbone that
                 extends middleware functionality throughout by
                 connecting heterogeneous components and systems and
                 offers integration services. Finally, the paper
                 proposes an approach to extend the conventional SOA to
                 cater for essential ESB requirements that include
                 capabilities such as service orchestration,
                 `intelligent' routing, provisioning, integrity and
                 security of message as well as service management. The
                 layers in this extended SOA, in short xSOA, are used to
                 classify research issues and current research
                 activities.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "application and service integration; asynchronous and
                 event-driven processing; enterprise bus; service
                 oriented architecture; Web services",
}

@Article{Byun:2008:PBA,
  author =       "Ji-Won Byun and Ninghui Li",
  title =        "Purpose based access control for privacy protection in
                 relational database systems",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "603--619",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this article, we present a comprehensive approach
                 for privacy preserving access control based on the
                 notion of purpose. In our model, purpose information
                 associated with a given data element specifies the
                 intended use of the data element. A key feature of our
                 model is that it allows multiple purposes to be
                 associated with each data element and also supports
                 explicit prohibitions, thus allowing privacy officers
                 to specify that some data should not be used for
                 certain purposes. An important issue addressed in this
                 article is the granularity of data labeling, i.e., the
                 units of data with which purposes can be associated. We
                 address this issue in the context of relational
                 databases and propose four different labeling schemes,
                 each providing a different granularity. We also propose
                 an approach to represent purpose information, which
                 results in low storage overhead, and we exploit query
                 modification techniques to support access control based
                 on purpose information. Another contribution of our
                 work is that we address the problem of how to determine
                 the purpose for which certain data are accessed by a
                 given user. Our proposed solution relies on role-based
                 access control (RBAC) models as well as the notion of
                 conditional role which is based on the notions of role
                 attribute and system attribute.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "access control; privacy; private data management;
                 purpose",
}

@Article{Karayannidis:2008:HCO,
  author =       "Nikos Karayannidis and Timos Sellis",
  title =        "Hierarchical clustering for {OLAP}: the {CUBE File}
                 approach",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "621--655",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper deals with the problem of physical
                 clustering of multidimensional data that are organized
                 in hierarchies on disk in a hierarchy-preserving
                 manner. This is called hierarchical clustering. A
                 typical case, where hierarchical clustering is
                 necessary for reducing I/Os during query evaluation, is
                 the most detailed data of an OLAP cube. The presence of
                 hierarchies in the multidimensional space results in an
                 enormous search space for this problem. We propose a
                 representation of the data space that results in a
                 chunk-tree representation of the cube. The model is
                 adaptive to the cube's extensive sparseness and
                 provides efficient access to subsets of data based on
                 hierarchy value combinations. Based on this
                 representation of the search space we formulate the
                 problem as a chunk-to-bucket allocation problem, which
                 is a packing problem as opposed to the linear ordering
                 approach followed in the literature.\par

                 We propose a metric to evaluate the quality of
                 hierarchical clustering achieved (i.e., evaluate the
                 solutions to the problem) and formulate the problem as
                 an optimization problem. We prove its NP-Hardness and
                 provide an effective solution based on a linear time
                 greedy algorithm. The solution of this problem leads to
                 the construction of the CUBE File data structure. We
                 analyze in depth all steps of the construction and
                 provide solutions for interesting sub-problems arising,
                 such as the formation of bucket-regions, the storage of
                 large data chunks and the caching of the upper nodes
                 (root directory) in main memory.\par

                 Finally, we provide an extensive experimental
                 evaluation of the CUBE File's adaptability to the data
                 space sparseness as well as to an increasing number of
                 data points. The main result is that the CUBE File is
                 highly adaptive to even the most sparse data spaces and
                 for realistic cases of data point cardinalities
                 provides hierarchical clustering of high quality and
                 significant space savings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "CUBE File; data cube; hierarchical clustering; OLAP;
                 Physical data clustering",
}

@Article{Plattner:2008:EDS,
  author =       "Christian Plattner and Gustavo Alonso and M. Tamer
                 {\"O}zsu",
  title =        "Extending {DBMSs} with satellite databases",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "657--682",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we propose an extensible architecture
                 for database engines where satellite databases are used
                 to scale out and implement additional functionality for
                 a centralized database engine. The architecture uses a
                 middleware layer that offers consistent views and a
                 single system image over a cluster of machines with
                 database engines. One of these engines acts as a master
                 copy while the others are read-only snapshots which we
                 call satellites. The satellites are lightweight DBMSs
                 used for scalability and to provide functionality
                 difficult or expensive to implement in the main engine.
                 Our approach also supports the dynamic creation of
                 satellites to be able to autonomously adapt to varying
                 loads. The paper presents the architecture, discusses
                 the research problems it raises, and validates its
                 feasibility with extensive experimental results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic satellite creation; extending database
                 functionality; satellite databases; snapshot
                 isolation",
}

@Article{Hsieh:2008:DEF,
  author =       "Ming-Jyh Hsieh and Wei-Guang Teng and Ming-Syan Chen
                 and Philip S. Yu",
  title =        "{DAWN}: an efficient framework of {DCT} for data with
                 error estimation",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "683--702",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "On-line analytical processing (OLAP) has become an
                 important component in most data warehouse systems and
                 decision support systems in recent years. In order to
                 deal with the huge amount of data, highly complex
                 queries and increasingly strict response time
                 requirements, approximate query processing has been
                 deemed a viable solution. Most works in this area,
                 however, focus on the space efficiency and are unable
                 to provide quality-guaranteed answers to queries. To
                 remedy this, in this paper, we propose an efficient
                 framework of DCT for dAta With error estimatioN, called
                 DAWN, which focuses on answering range-sum queries from
                 compressed OP-cubes transformed by DCT. Specifically,
                 utilizing the techniques of Geometric series and
                 Euler's formula, we devise a robust summation function,
                 called the GE function, to answer range queries in
                 constant time, regardless of the number of data cells
                 involved. Note that the GE function can estimate the
                 summation of cosine functions precisely; thus the
                 quality of the answers is superior to that of previous
                 works. Furthermore, an estimator of errors based on the
                 Brown noise assumption (BNA) is devised to provide
                 tight bounds for answering range-sum queries. Our
                 experiment results show that the DAWN framework is
                 scalable to the selectivity of queries and the
                 available storage space. With GE functions and the BNA
                 method, the DAWN framework not only delivers high
                 quality answers for range-sum queries, but also leads
                 to shorter query response time due to its effectiveness
                 in error estimation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Atzori:2008:APP,
  author =       "Maurizio Atzori and Francesco Bonchi and Fosca
                 Giannotti and Dino Pedreschi",
  title =        "Anonymity preserving pattern discovery",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "703--727",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "It is generally believed that data mining results do
                 not violate the anonymity of the individuals recorded
                 in the source database. In fact, data mining models and
                 patterns, in order to ensure a required statistical
                 significance, represent a large number of individuals
                 and thus conceal individual identities: this is the
                 case of the minimum support threshold in frequent
                 pattern mining. In this paper we show that this belief
                 is ill-founded. By shifting the concept of
                 $k$-anonymity from the source data to the extracted
                 patterns, we formally characterize the notion of a
                 threat to anonymity in the context of pattern
                 discovery, and provide a methodology to efficiently and
                 effectively identify all such possible threats that
                 arise from the disclosure of the set of extracted
                 patterns. On this basis, we obtain a formal notion of
                 privacy protection that allows the disclosure of the
                 extracted knowledge while protecting the anonymity of
                 the individuals in the source database. Moreover, in
                 order to handle the cases where the threats to
                 anonymity cannot be avoided, we study how to eliminate
                 such threats by means of pattern (not data!) distortion
                 performed in a controlled way.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "anonymity; frequent pattern mining; individual
                 privacy; knowledge discovery; privacy preserving data
                 mining",
}

@Article{Morfonios:2008:SDC,
  author =       "Konstantinos Morfonios and Yannis Ioannidis",
  title =        "Supporting the data cube lifecycle: the power of
                 {ROLAP}",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "729--764",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The lifecycle of a data cube involves efficient
                 construction and storage, fast query answering, and
                 incremental updating. Existing ROLAP methods that
                 implement data cubes are weak with respect to one or
                 more of the above, focusing mainly on construction and
                 storage. In this paper, we present a comprehensive
                 ROLAP solution that addresses efficiently all
                 functionality in the lifecycle of a cube and can be
                 implemented easily over existing relational servers. It
                 is a family of algorithms centered around a purely
                 ROLAP construction method that provides fast
                 computation of a fully materialized cube in compressed
                 form, is incrementally updatable, and exhibits quick
                 query response times that can be improved by low-cost
                 indexing and caching. This is demonstrated through
                 comprehensive experiments on both synthetic and
                 real-world datasets, whose results have shown great
                 promise for the performance and scalability potential
                 of the proposed techniques, with respect to both the
                 size and dimensionality of the fact table.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "caching; compressed storage; data cube; incremental
                 updating; indexing; query processing; ROLAP",
}

@Article{Sharifzadeh:2008:OSR,
  author =       "Mehdi Sharifzadeh and Mohammad Kolahdouzan and Cyrus
                 Shahabi",
  title =        "The optimal sequenced route query",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "765--787",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Real-world road-planning applications often result in
                 the formulation of new variations of the nearest
                 neighbor (NN) problem requiring new solutions. In this
                 paper, we study an unexplored form of NN queries named
                 optimal sequenced route (OSR) query in both vector and
                 metric spaces. OSR strives to find a route of minimum
                 length starting from a given source location and
                 passing through a number of typed locations in a
                 particular order imposed on the types of the locations.
                 We first transform the OSR problem into a shortest path
                 problem on a large planar graph. We show that a classic
                 shortest path algorithm such as Dijkstra's is
                 impractical for most real-world scenarios. Therefore,
                 we propose LORD, a light threshold-based iterative
                 algorithm, which utilizes various thresholds to prune
                 the locations that cannot belong to the optimal route.
                 Then we propose R-LORD, an extension of LORD which uses
                 R-tree to examine the threshold values more
                 efficiently. Finally, for applications that cannot
                 tolerate the Euclidean distance as estimation and
                 require exact distance measures in metric spaces (e.g.,
                 road networks) we propose PNE that progressively issues
                 NN queries on different point types to construct the
                 optimal route for the OSR query. Our extensive
                 experiments on both real-world and synthetic datasets
                 verify that our algorithms significantly outperform a
                 disk-based variation of the Dijkstra approach in terms
                 of processing time (up to two orders of magnitude) and
                 required workspace (up to 90\% reduction on average).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "nearest neighbor search; spatial databases; trip
                 planning queries",
}

@Article{Friedman:2008:PAD,
  author =       "Arik Friedman and Ran Wolff and Assaf Schuster",
  title =        "Providing $k$-anonymity in data mining",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "789--804",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper we present extended definitions of
                 $k$-anonymity and use them to prove that a given data
                 mining model does not violate the $k$-anonymity of the
                 individuals represented in the learning examples. Our
                 extension provides a tool that measures the amount of
                 anonymity retained during data mining. We show that our
                 model can be applied to various data mining problems,
                 such as classification, association rule mining and
                 clustering. We describe two data mining algorithms
                 which exploit our extension to guarantee they will
                 generate only $k$-anonymous output, and provide
                 experimental results for one of them. Finally, we show
                 that our method contributes new and efficient ways to
                 anonymize data and preserve patterns during
                 anonymization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Harder:2008:VCC,
  author =       "Theo H{\"a}rder and Andreas B{\"u}hmann",
  title =        "Value complete, column complete, predicate complete",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "805--826",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Caching is a proven remedy to enhance scalability and
                 availability of software systems as well as to reduce
                 latency of user requests. In contrast to Web caching
                 where single Web objects are accessed and kept ready
                 somewhere in caches in the user-to-server path,
                 database caching uses full-fledged database management
                 systems as caches, close to application servers at the
                 edge of the Web, to adaptively maintain sets of records
                 from a remote database and to evaluate queries on them.
                 We analyze a new class of approaches to database
                 caching where the extensions of query predicates that
                 are to be evaluated are constructed by constraints in
                 the cache. Starting from the key concept of value
                 completeness, we explore the application of cache
                 constraints and their implications on query evaluation
                 correctness and on controllable cache loading called
                 cache safeness. Furthermore, we identify simple rules
                 for the design of cache groups and their optimization
                 before discussing the use of single cache groups and
                 cache group federations. Finally, we argue that
                 predicate completeness can be used to develop new
                 variants of constraint-based database caching.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "cache constraints; database caching; predicate
                 completeness; query processing",
}

@Article{Ou:2008:EAI,
  author =       "Jian-Chih Ou and Chang-Hung Lee and Ming-Syan Chen",
  title =        "Efficient algorithms for incremental {Web} log mining
                 with dynamic thresholds",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "827--845",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the fast increase in Web activities, Web data
                 mining has recently become an important research topic
                 and is receiving a significant amount of interest from
                 both academic and industrial environments. While
                 existing methods are efficient for the mining of
                 frequent path traversal patterns from the access
                 information contained in a log file, these approaches
                 are likely to over evaluate associations. Explicitly,
                 most previous studies of mining path traversal patterns
                 are based on the model of a uniform support threshold,
                 where a single support threshold is used to determine
                 frequent traversal patterns without taking into
                 consideration such important factors as the length of a
                 pattern, the positions of Web pages, and the importance
                 of a particular pattern, etc. As a result, a low
                 support threshold will lead to lots of uninteresting
                 patterns derived whereas a high support threshold may
                 cause some interesting patterns with lower supports to
                 be ignored. In view of this, this paper broadens the
                 horizon of frequent path traversal pattern mining by
                 introducing a flexible model of mining Web traversal
                 patterns with dynamic thresholds. Specifically, we
                 study and apply the Markov chain model to provide the
                 determination of support threshold of Web documents;
                 and further, by properly employing some effective
                 techniques devised for joining reference sequences, the
                 proposed algorithm dynamic threshold miner (DTM) not
                 only possesses the capability of mining with dynamic
                 thresholds, but also significantly improves the
                 execution efficiency as well as contributes to the
                 incremental mining of Web traversal patterns.
                 Performance of algorithm DTM and the extension of
                 existing methods is comparatively analyzed with
                 synthetic and real Web logs. It is shown that the
                 option of algorithm DTM is very advantageous in
                 reducing the number of unnecessary rules produced and
                 leads to prominent performance improvement.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic support threshold; Web mining path traversal
                 pattern",
}

@Article{Alagic:2008:GJP,
  author =       "Suad Alagi{\'c} and Mark Royer",
  title =        "Genericity in {Java}: persistent and database systems
                 implications",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "847--878",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Lack of parametric polymorphism has been a major
                 obstacle for making Java a viable database programming
                 language. Regrettably, a recently accepted solution for
                 genericity in Java 5.0 has far-reaching negative
                 implications for persistent and database systems
                 because of static and dynamic type violations. Severe
                 implications occur in typical database transactions
                 when processing a variety of database collections.
                 Well-known approaches to persistence in Java, including
                 Java's own persistence mechanism, do not perform
                 correctly due to incorrect dynamic type information
                 that gets promoted to persistence along with objects.
                 Dynamic checking of types of objects fetched from the
                 persistent store may now lead to unexpected type
                 violations. Further problems occur in reflective
                 transactions as Java Core Reflection now allows dynamic
                 type violations without detecting them or throwing
                 standard exceptions. All of this shows that extending
                 Java with parametric polymorphism has not made Java a
                 more viable database programming language. Both legacy
                 systems, such as those based on the Java binding of the
                 ODMG or JDO, and future Java-related persistent and
                 database technologies will be affected. The source of
                 these problems is in an implementation idiom called
                 type erasure. This paper provides formal proofs of the
                 above implications of type erasure along with specific
                 samples of code in Java 5.0 illustrating these
                 violations. The limitations of the virtual platform and
                 extensions required for persistent systems to solve
                 this problem are also elaborated.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Java; object persistence; object-oriented databases;
                 reflection; transactions; type systems; virtual
                 platforms",
}

@Article{Vaidya:2008:PPN,
  author =       "Jaideep Vaidya and Murat Kantarc{\i}o{\u{g}}lu and
                 Chris Clifton",
  title =        "Privacy-preserving {Na{\"\i}ve Bayes} classification",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "879--898",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Privacy-preserving data mining--developing models
                 without seeing the data --- is receiving growing
                 attention. This paper assumes a privacy-preserving
                 distributed data mining scenario: data sources
                 collaborate to develop a global model, but must not
                 disclose their data to others. The problem of secure
                 distributed classification is an important one. In many
                 situations, data is split between multiple
                 organizations. These organizations may want to utilize
                 all of the data to create more accurate predictive
                 models while revealing neither their training
                 data/databases nor the instances to be classified.
                 Na{\"\i}ve Bayes is often used as a baseline
                 classifier, consistently providing reasonable
                 classification performance. This paper brings
                 privacy-preservation to that baseline, presenting
                 protocols to develop a Na{\"\i}ve Bayes classifier on
                 both vertically as well as horizontally partitioned
                 data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data mining; Distributed computing; Na{\"\i} Privacy;
                 Security; ve Bayes",
}

@Article{Fu:2008:STW,
  author =       "Ada Wai-Chee Fu and Eamonn Keogh and Leo Yung Lau and
                 Chotirat Ann Ratanamahatana and Raymond Chi-Wing Wong",
  title =        "Scaling and time warping in time series querying",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "899--921",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The last few years have seen an increasing
                 understanding that dynamic time warping (DTW), a
                 technique that allows local flexibility in aligning
                 time series, is superior to the ubiquitous Euclidean
                 distance for time series classification, clustering,
                 and indexing. More recently, it has been shown that for
                 some problems, uniform scaling (US), a technique that
                 allows global scaling of time series, may just be as
                 important for some problems. In this work, we note that
                 for many real world problems, it is necessary to
                 combine both DTW and US to achieve meaningful results.
                 This is particularly true in domains where we must
                 account for the natural variability of human actions,
                 including biometrics, query by humming,
                 motion-capture/animation, and handwriting recognition.
                 We introduce the first technique which can handle both
                 DTW and US simultaneously, our techniques involve
                 search pruning by means of a lower bounding technique
                 and multi-dimensional indexing to speed up the search.
                 We demonstrate the utility and effectiveness of our
                 method on a wide range of problems in industry,
                 medicine, and entertainment.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "dynamic time warping; nearest neighbor search; scaled
                 and warped matching; subsequence matching; uniform
                 scaling",
}

@Article{Mouratidis:2008:TBP,
  author =       "Kyriakos Mouratidis and Dimitris Papadias and Spiros
                 Papadimitriou",
  title =        "Tree-based partition querying: a methodology for
                 computing medoids in large spatial datasets",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "923--945",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Besides traditional domains (e.g., resource
                 allocation, data mining applications), algorithms for
                 medoid computation and related problems will play an
                 important role in numerous emerging fields, such as
                 location based services and sensor networks. Since the
                 $k$-medoid problem is NP-hard, all existing work deals
                 with approximate solutions on relatively small
                 datasets. This paper aims at efficient methods for very
                 large spatial databases, motivated by: (1) the high and
                 ever increasing availability of spatial data, and (2)
                 the need for novel query types and improved services.
                 The proposed solutions exploit the intrinsic grouping
                 properties of a data partition index in order to read
                 only a small part of the dataset. Compared to previous
                 approaches, we achieve results of comparable or better
                 quality at a small fraction of the CPU and I/O costs
                 (seconds as opposed to hours, and tens of node accesses
                 instead of thousands). In addition, we study
                 medoid-aggregate queries, where $k$ is not known in
                 advance, but we are asked to compute a medoid set that
                 leads to an average distance close to a user-specified
                 value. Similarly, medoid-optimization queries aim at
                 minimizing both the number of medoids $k$ and the
                 average distance. We also consider the max version for
                 the aforementioned problems, where the goal is to
                 minimize the maximum (instead of the average) distance
                 between any object and its closest medoid. Finally, we
                 investigate bichromatic and weighted medoid versions
                 for all query types, as well as, maximum capacity and
                 dynamic medoids.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "medoid queries; query processing; spatial databases",
}

@Article{Yu:2008:DMP,
  author =       "Jeffrey Xu Yu and Zhiheng Li and Guimei Liu",
  title =        "A data mining proxy approach for efficient frequent
                 itemset mining",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "4",
  pages =        "947--970",
  month =        jul,
  year =         "2008",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Jun 23 10:51:30 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data mining has attracted a lot of research efforts
                 during the past decade. However, little work has been
                 reported on the efficiency of supporting a large number
                 of users who issue different data mining queries
                 periodically when there are new needs and when data is
                 updated. Our work is motivated by the fact that the
                 pattern-growth method is one of the most efficient
                 methods for frequent pattern mining which constructs an
                 initial tree and mines frequent patterns on top of the
                 tree. In this paper, we present a data mining proxy
                 approach that can reduce the I/O costs to construct an
                 initial tree by utilizing the trees that have already
                 been resident in memory. The tree we construct is the
                 smallest for a given data mining query. In addition,
                 our proxy approach can also reduce CPU cost in mining
                 patterns, because the cost of mining relies on the
                 sizes of trees. The focus of the work is to construct
                 an initial tree efficiently. We propose three tree
                 operations to construct a tree. With a unique coding
                 scheme, we can efficiently project subtrees from
                 on-disk trees or in-memory trees. Our performance study
                 indicated that the data mining proxy significantly
                 reduces the I/O cost to construct trees and CPU cost to
                 mine patterns over the trees constructed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mokbel:2008:SSL,
  author =       "Mohamed F. Mokbel and Walid G. Aref",
  title =        "{SOLE}: scalable on-line execution of continuous
                 queries on spatio-temporal data streams",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "971--995",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0046-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper presents the scalable on-line execution
                 (SOLE) algorithm for continuous and on-line evaluation
                 of concurrent continuous spatio-temporal queries over
                 data streams. Incoming spatio-temporal data streams are
                 processed in-memory against a set of outstanding
                 continuous queries. The SOLE algorithm utilizes the
                 scarce memory resource efficiently by keeping track of
                 only the significant objects. In-memory stored objects
                 are expired (i.e., dropped) from memory once they
                 become insignificant. SOLE is a scalable algorithm
                 where all the continuous outstanding queries share the
                 same buffer pool. In addition, SOLE is presented as a
                 spatio-temporal join between two input streams, a
                 stream of spatio-temporal objects and a stream of
                 spatio-temporal queries. To cope with intervals of high
                 arrival rates of objects and/or queries, SOLE utilizes
                 a load-shedding approach where some of the stored
                 objects are dropped from memory. SOLE is implemented as
                 a pipelined query operator that can be combined with
                 traditional query operators in a query execution plan
                 to support a wide variety of continuous queries.
                 Performance experiments based on a real implementation
                 of SOLE inside a prototype of a data stream management
                 system show the scalability and efficiency of SOLE in
                 highly dynamic environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pol:2008:MVL,
  author =       "Abhijit Pol and Christopher Jermaine and Subramanian
                 Arumugam",
  title =        "Maintaining very large random samples using the
                 geometric file",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "997--1018",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0048-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Random sampling is one of the most fundamental data
                 management tools available. However, most current
                 research involving sampling considers the problem of
                 how to use a sample, and not how to compute one. The
                 implicit assumption is that a 'sample' is a small data
                 structure that is easily maintained as new data are
                 encountered, even though simple statistical arguments
                 demonstrate that very large samples of gigabytes or
                 terabytes in size can be necessary to provide high
                 accuracy. No existing work tackles the problem of
                 maintaining very large, disk-based samples from a data
                 management perspective, and no techniques now exist for
                 maintaining very large samples in an online manner from
                 streaming data. In this paper, we present online
                 algorithms for maintaining on-disk samples that are
                 gigabytes or terabytes in size. The algorithms are
                 designed for streaming data, or for any environment
                 where a large sample must be maintained online in a
                 single pass through a data set. The algorithms meet the
                 strict requirement that the sample always be a true,
                 statistically random sample (without replacement) of
                 all of the data processed thus far. We also present
                 algorithms to retrieve small size random sample from
                 large disk-based sample which may be used for various
                 purposes including statistical analyses by a DBMS.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abiteboul:2008:AXP,
  author =       "Serge Abiteboul and Omar Benjelloun and Tova Milo",
  title =        "The {Active XML} project: an overview",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1019--1040",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0049-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper provides an overview of the Active XML
                 project developed at INRIA over the past five years.
                 Active XML (AXML, for short), is a declarative
                 framework that harnesses Web services for distributed
                 data management, and is put to work in a peer-to-peer
                 architecture. The model is based on AXML documents,
                 which are XML documents that may contain embedded calls
                 to Web services, and on AXML services, which are Web
                 services capable of exchanging AXML documents. An AXML
                 peer is a repository of AXML documents that acts both
                 as a client by invoking the embedded service calls, and
                 as a server by providing AXML services, which are
                 generally defined as queries or updates over the
                 persistent AXML documents. The approach gracefully
                 combines stored information with data defined in an
                 intensional manner as well as dynamic information. This
                 simple, rather classical idea leads to a number of
                 technically challenging problems, both theoretical and
                 practical. In this paper, we describe and motivate the
                 AXML model and language, overview the research results
                 obtained in the course of the project, and show how all
                 the pieces come together in our implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data exchange; Intensional information; Web services;
                 XML",
}

@Article{Buccafurri:2008:EHT,
  author =       "Francesco Buccafurri and Gianluca Lax and Domenico
                 Sacc{\`a} and Luigi Pontieri and Domenico Rosaci",
  title =        "Enhancing histograms by tree-like bucket indices",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1041--1061",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0050-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Histograms are used to summarize the contents of
                 relations into a number of buckets for the estimation
                 of query result sizes. Several techniques have been
                 proposed in the past for determining bucket boundaries
                 which provide accurate estimations. However, while
                 search strategies for optimal bucket boundaries are
                 rather sophisticated, no much attention has been paid
                 for estimating queries inside buckets and all of the
                 above techniques adopt naive methods for such an
                 estimation. This paper focuses on the problem of
                 improving the estimation inside a bucket once its
                 boundaries have been fixed. The proposed technique is
                 based on the addition, to each bucket, of a memory-word
                 additional information (organized into a tree-like
                 index), storing approximate cumulative frequencies in a
                 hierarchical fashion. Both theoretical analysis and
                 experimental results show that the proposed approach
                 improves the accuracy of the estimation inside buckets,
                 w.r.t. both classical approaches (like continuous value
                 assumption and uniform spread assumption) and a number
                 of alternative ways to organize the additional
                 information. The index is later added to
                 state-of-the-art histograms obtaining the non-obvious
                 result that despite the spatial overhead which reduces
                 the number of allowed buckets once the storage space
                 has been fixed, the original methods are strongly
                 improved in terms of accuracy.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Approximate OLAP; Histograms; Range query estimation",
}

@Article{Kamra:2008:DAA,
  author =       "Ashish Kamra and Evimaria Terzi and Elisa Bertino",
  title =        "Detecting anomalous access patterns in relational
                 databases",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1063--1077",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0051-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A considerable effort has been recently devoted to the
                 development of Database Management Systems (DBMS) which
                 guarantee high assurance and security. An important
                 component of any strong security solution is
                 represented by Intrusion Detection (ID) techniques,
                 able to detect anomalous behavior of applications and
                 users. To date, however, there have been few ID
                 mechanisms proposed which are specifically tailored to
                 function within the DBMS. In this paper, we propose
                 such a mechanism. Our approach is based on mining SQL
                 queries stored in database audit log files. The result
                 of the mining process is used to form profiles that can
                 model normal database access behavior and identify
                 intruders. We consider two different scenarios while
                 addressing the problem. In the first case, we assume
                 that the database has a Role Based Access Control
                 (RBAC) model in place. Under a RBAC system permissions
                 are associated with roles, grouping several users,
                 rather than with single users. Our ID system is able to
                 determine role intruders, that is, individuals while
                 holding a specific role, behave differently than
                 expected. An important advantage of providing an ID
                 technique specifically tailored to RBAC databases is
                 that it can help in protecting against insider threats.
                 Furthermore, the existence of roles makes our approach
                 usable even for databases with large user population.
                 In the second scenario, we assume that there are no
                 roles associated with users of the database. In this
                 case, we look directly at the behavior of the users. We
                 employ clustering algorithms to form concise profiles
                 representing normal user behavior. For detection, we
                 either use these clustered profiles as the roles or
                 employ outlier detection techniques to identify
                 behavior that deviates from the profiles. Our
                 preliminary experimental evaluation on both real and
                 synthetic database traces shows that our methods work
                 well in practical situations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Anomaly detection; DBMS; Intrusion detection; RBAC;
                 User profiles",
}

@Article{Guha:2008:WSH,
  author =       "Sudipto Guha and Hyoungmin Park and Kyuseok Shim",
  title =        "Wavelet synopsis for hierarchical range queries with
                 workloads",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1079--1099",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0052-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Synopses structures and approximate query answering
                 have become increasingly important in DSS/ OLAP
                 applications with stringent response time requirements.
                 Range queries are an important class of problems in
                 this domain, and have a wide variety of applications
                 and have been studied in the context of histograms.
                 However, wavelets have been shown to be quite useful in
                 several scenarios and in fact their multi-resolution
                 structure makes them especially appealing for
                 hierarchical domains. Furthermore the fact that the
                 Haar wavelet basis has a linear time algorithm for the
                 computation of coefficients has made the Haar basis one
                 of the important and widely used synopsis structures.
                 Very recently optimal algorithms were proposed for the
                 wavelet synopsis construction problem for
                 equality/point queries. In this paper we investigate
                 the problem of optimum Haar wavelet synopsis
                 construction for range queries with workloads. We
                 provide optimum algorithms as well as approximation
                 heuristics and demonstrate the effectiveness of these
                 algorithms with our extensive experimental evaluation
                 using synthetic and real-life data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Approximate query answers; Data synopses; Query
                 processing; Wavelet decomposition",
}

@Article{Deng:2008:MRS,
  author =       "Ke Deng and Xiaofang Zhou and Heng Tao Shen and Qing
                 Liu and Kai Xu and Xuemin Lin",
  title =        "A multi-resolution surface distance model for {$k$-NN}
                 query processing",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1101--1119",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0053-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A spatial k-NN query returns k nearest points in a
                 point dataset to a given query point. To measure the
                 distance between two points, most of the literature
                 focuses on the Euclidean distance or the network
                 distance. For many applications, such as wildlife
                 movement, it is necessary to consider the surface
                 distance, which is computed from the shortest path
                 along a terrain surface. In this paper, we investigate
                 the problem of efficient surface k-NN (sk-NN) query
                 processing. This is an important yet highly challenging
                 problem because the underlying environment data can be
                 very large and the computational cost of finding the
                 shortest path on a surface can be very high. To
                 minimize the amount of surface data to be used and the
                 cost of surface distance computation, a
                 multi-resolution surface distance model is proposed in
                 this paper to take advantage of monotonic distance
                 changes when the distances are computed at different
                 resolution levels. Based on this innovative model,
                 sk-NN queries can be processed efficiently by accessing
                 and processing surface data at a just-enough resolution
                 level within a just-enough search region. Our extensive
                 performance evaluations using real world datasets
                 confirm the efficiency of our proposed model.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chuang:2008:PLR,
  author =       "Kun-Ta Chuang and Jiun-Long Huang and Ming-Syan
                 Chen",
  title =        "Power-law relationship and self-similarity in the
                 itemset support distribution: analysis and
                 applications",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1121--1141",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0054-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we identify and explore that the
                 power-law relationship and the self-similar phenomenon
                 appear in the itemset support distribution. The itemset
                 support distribution refers to the distribution of the
                 count of itemsets versus their supports. Exploring the
                 characteristics of these natural phenomena is useful to
                 many applications such as providing the direction of
                 tuning the performance of the frequent-itemset mining.
                 However, due to the explosive number of itemsets, it is
                 prohibitively expensive to retrieve lots of itemsets
                 before we identify the characteristics of the itemset
                 support distribution in targeted data. As such, we also
                 propose a valid and cost-effective algorithm, called
                 algorithm PPL, to extract characteristics of the
                 itemset support distribution. Furthermore, to fully
                 explore the advantages of our discovery, we also
                 propose novel mechanisms with the help of PPL to solve
                 two important problems: (1) determining a subtle
                 parameter for mining approximate frequent itemsets over
                 data streams; and (2) determining the sufficient sample
                 size for mining frequent patterns. As validated in our
                 experimental results, PPL can efficiently and precisely
                 identify the characteristics of the itemset support
                 distribution in various real data. In addition,
                 empirical studies also demonstrate that our mechanisms
                 for those two challenging problems are in orders of
                 magnitude better than previous works, showing the
                 prominent advantage of PPL to be an important
                 pre-processing means for mining applications.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Padmanabhan:2008:SDR,
  author =       "Prasanna Padmanabhan and Le Gruenwald and Anita Vallur
                 and Mohammed Atiquzzaman",
  title =        "A survey of data replication techniques for mobile ad
                 hoc network databases",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1143--1164",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0055-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A mobile ad hoc network (MANET) is a network that
                 allows mobile servers and clients to communicate in the
                 absence of a fixed infrastructure. MANET is a fast
                 growing area of research as it finds use in a variety
                 of applications. In order to facilitate efficient data
                 access and update, databases are deployed on MANETs.
                 These databases that operate on MANETs are referred to
                 as MANET databases. Since data availability in MANETs
                 is affected by the mobility and power constraints of
                 the servers and clients, data in MANETs are replicated.
                 A number of data replication techniques have been
                 proposed for MANET databases. This paper identifies
                 issues involved in MANET data replication and attempts
                 to classify existing MANET data replication techniques
                 based on the issues they address. The attributes of the
                 replication techniques are also tabulated to facilitate
                 a feature comparison of the existing MANET data
                 replication works. Parameters and performance metrics
                 are also presented to measure the performance of MANET
                 replication techniques. In addition, this paper also
                 proposes criteria for selecting appropriate data
                 replication techniques for various application
                 requirements. Finally, the paper concludes with a
                 discussion on future research directions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data replication; Mobile ad hoc network databases;
                 Mobile databases",
}

@Article{Zhong:2008:GPT,
  author =       "Sheng Zhong and Zhiqiang Yang",
  title =        "Guided perturbation: towards private and accurate
                 mining",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1165--1177",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0056-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "There have been two methods for privacy- preserving
                 data mining: the perturbation approach and the
                 cryptographic approach. The perturbation approach is
                 typically very efficient, but it suffers from a
                 tradeoff between accuracy and privacy. In contrast, the
                 cryptographic approach usually maintains accuracy, but
                 it is more expensive in computation and communication
                 overhead. We propose a novel perturbation method,
                 called guided perturbation. Specifically, we focus on a
                 central problem of privacy-preserving data mining--the
                 secure scalar product problem of vertically partitioned
                 data, and give a solution based on guided perturbation,
                 with good, provable privacy guarantee. Our solution
                 achieves accuracy comparable to the cryptographic
                 solutions, while keeping the efficiency of perturbation
                 solutions. Our experimental results show that it can be
                 more than one hundred times faster than a typical
                 cryptographic solution.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Rizzolo:2008:TXM,
  author =       "Flavio Rizzolo and Alejandro A. Vaisman",
  title =        "Temporal {XML}: modeling, indexing, and query
                 processing",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1179--1212",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0058-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper we address the problem of modeling and
                 implementing temporal data in XML. We propose a data
                 model for tracking historical information in an XML
                 document and for recovering the state of the document
                 as of any given time. We study the temporal constraints
                 imposed by the data model, and present algorithms for
                 validating a temporal XML document against these
                 constraints, along with methods for fixing inconsistent
                 documents. In addition, we discuss different ways of
                 mapping the abstract representation into a temporal XML
                 document, and introduce TXPath, a temporal XML query
                 language that extends XPath 2.0. In the second part of
                 the paper, we present our approach for summarizing and
                 indexing temporal XML documents. In particular we show
                 that by indexing continuous paths, i.e., paths that are
                 valid continuously during a certain interval in a
                 temporal XML graph, we can dramatically increase query
                 performance. To achieve this, we introduce a new class
                 of summaries, denoted TSummary, that adds the time
                 dimension to the well-known path summarization schemes.
                 Within this framework, we present two new summaries:
                 LCP and Interval summaries. The indexing scheme,
                 denoted TempIndex, integrates these summaries with
                 additional data structures. We give a query processing
                 strategy based on TempIndex and a type of
                 ancestor-descendant encoding, denoted temporal interval
                 encoding. We present a persistent implementation of
                 TempIndex, and a comparison against a system based on a
                 non-temporal path index, and one based on DOM. Finally,
                 we sketch a language for updates, and show that the
                 cost of updating the index is compatible with
                 real-world requirements.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Semistructured data; Structural summaries; Temporal
                 databases; XML; XPath",
}

@Article{Jin:2008:SES,
  author =       "Liang Jin and Chen Li and Rares Vernica",
  title =        "{SEPIA}: estimating selectivities of approximate
                 string predicates in large databases",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1213--1229",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0061-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many database applications have the emerging need to
                 support approximate queries that ask for strings that
                 are similar to a given string, such as 'name similar to
                 smith' and 'telephone number similar to 412-0964'.
                 Query optimization needs the selectivity of such an
                 approximate predicate, i.e., the fraction of records in
                 the database that satisfy the condition. In this paper,
                 we study the problem of estimating selectivities of
                 approximate string predicates. We develop a novel
                 technique, called Sepia, to solve the problem. Given a
                 bag of strings, our technique groups the strings into
                 clusters, builds a histogram structure for each
                 cluster, and constructs a global histogram. It is based
                 on the following intuition: given a query string $q$, a
                 preselected string $p$ in a cluster, and a string $s$
                 in the cluster, based on the proximity between $q$ and
                 $p$, and the proximity between $p$ and $s$, we can
                 obtain a probability distribution from a global
                 histogram about the similarity between $q$ and $s$. We
                 give a full specification of the technique using the
                 edit distance metric. We study challenges in adopting
                 this technique, including how to construct the
                 histogram structures, how to use them to do selectivity
                 estimation, and how to alleviate the effect of
                 non-uniform errors in the estimation. We discuss how to
                 extend the techniques to other similarity functions.
                 Our extensive experiments on real data sets show that
                 this technique can accurately estimate selectivities of
                 approximate string predicates.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Approximate; Estimation; Selectivity; SEPIA; String",
}

@Article{Venkateswaran:2008:RBI,
  author =       "Jayendra Venkateswaran and Tamer Kahveci and
                 Christopher Jermaine and Deepak Lachwani",
  title =        "Reference-based indexing for metric spaces with costly
                 distance measures",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1231--1251",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0062-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider the problem of similarity search in
                 databases with costly metric distance measures. Given
                 limited main memory, our goal is to develop a
                 reference-based index that reduces the number of
                 comparisons in order to answer a query. The idea in
                 reference-based indexing is to select a small set of
                 reference objects that serve as a surrogate for the
                 other objects in the database. We consider novel
                 strategies for selection of references and assigning
                 references to database objects. For dynamic databases
                 with frequent updates, we propose two incremental
                 versions of the selection algorithm. Our experimental
                 results show that our selection and assignment methods
                 far outperform competing methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Earth mover's distance; Edit distance; Metric
                 measures; Reference-indexing",
}

@Article{Tao:2008:PDW,
  author =       "Yufei Tao and Xiaokui Xiao",
  title =        "Primal or dual: which promises faster spatiotemporal
                 search?",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1253--1270",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0064-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The existing predictive spatiotemporal indexes can be
                 classified into two categories, depending on whether
                 they are based on the primal or dual methodology.
                 Although we have gained considerable empirical
                 knowledge about various access methods, currently there
                 is only limited understanding on the theoretical
                 characteristics of the two methodologies. In fact, the
                 experimental results in different papers even
                 contradict each other, regarding the relative
                 superiority of the primal and dual techniques. This
                 paper presents a careful study on the query performance
                 of general primal and dual indexes, and reveals
                 important insight into the behavior of each technique.
                 In particular, we mathematically establish the
                 conditions that determine the superiority of each
                 methodology, and provide rigorous justification for
                 well-known observations that have not been properly
                 explained in the literature. Our analytical findings
                 also resolve the contradiction in the experiments of
                 previous work.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Moving objects; Range search; Spatial database;
                 Theory",
}

@Article{Tao:2008:ETC,
  author =       "Yufei Tao and Xiaokui Xiao",
  title =        "Efficient temporal counting with bounded error",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1271--1292",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0066-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper studies aggregate search in transaction
                 time databases. Specifically, each object in such a
                 database can be modeled as a horizontal segment, whose
                 $y$-projection is its search key, and its
                 $x$-projection represents the period when the key was
                 valid in history. Given a query timestamp $q_t$ and a
                 key range $\vec{q\_k}$, a count-query retrieves the
                 number of objects that are alive at $q_t$, and their
                 keys fall in $\vec{q\_k}$. We provide a method that
                 accurately answers such queries, with error less than
                 $\frac{1}{\varepsilon} + \varepsilon \cdot N\_{\rm
                 alive}(q\_t)$, where $N {\rm alive}(q_t)$ is the number
                 of objects alive at time $q_t$, and $C$ is any constant
                 in $(0, 1]$. Denoting the disk page size as $B$, and $n
                 = C N / B$, our technique requires $O(n)$ space,
                 processes any query in $O(\log_B n)$ time, and supports
                 each update in $O(\log_B n)$ amortized I/Os. As
                 demonstrated by extensive experiments, the proposed
                 solutions guarantee query results with extremely high
                 precision (median relative error below 5\%), while
                 consuming only a fraction of the space occupied by the
                 existing approaches that promise precise results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Aggregate search; Approximate query processing;
                 Temporal database",
}

@Article{Islam:2008:ACB,
  author =       "Aminul Islam and Diana Inkpen and Iluju Kiringa",
  title =        "Applications of corpus-based semantic similarity and
                 word segmentation to database schema matching",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1293--1320",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0067-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we present a method for database schema
                 matching: the problem of identifying elements of two
                 given schemas that correspond to each other. Schema
                 matching is useful in e-commerce exchanges, in data
                 integration/warehousing, and in semantic web
                 applications. We first present two corpus-based
                 methods: one method is for determining the semantic
                 similarity of two target words and the other is for
                 automatic word segmentation. Then we present a
                 name-based element-level database schema matching
                 method that exploits both the semantic similarity and
                 the word segmentation methods. Our word similarity
                 method uses pointwise mutual information (PMI) to sort
                 lists of important neighbor words of two target words;
                 the words which are common in both lists are selected
                 and their PMI values are aggregated to calculate the
                 relative similarity score. Our word segmentation method
                 uses corpus type frequency information to choose the
                 type with maximum length and frequency from
                 'desegmented' text. It also uses a modified
                 forward---backward matching technique using maximum
                 length frequency and entropy rate if any non-matching
                 portions of the text exist. Finally, we exploit both
                 the semantic similarity and the word segmentation
                 methods in our proposed name-based element-level schema
                 matching method. This method uses a single property
                 (i.e., element name) for schema matching and
                 nevertheless achieves a measure score that is
                 comparable to the methods that use multiple properties
                 (e.g., element name, text description, data instance,
                 context description). Our schema matching method also
                 uses normalized and modified versions of the longest
                 common subsequence string matching algorithm with
                 weight factors to allow for a balanced combination. We
                 validate our methods with experimental studies, the
                 results of which suggest that these methods can be a
                 useful addition to the set of existing methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Corpus-based methods; Database schema matching;
                 Semantic similarity; Word segmentation",
}

@Article{Chuang:2008:MTK,
  author =       "Kun-Ta Chuang and Jiun-Long Huang and Ming-Syan
                 Chen",
  title =        "Mining top-k frequent patterns in the presence of the
                 memory constraint",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "5",
  pages =        "1321--1344",
  month =        aug,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0078-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 10 10:00:50 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We explore in this paper a practicably interesting
                 mining task to retrieve top-$k$ (closed) itemsets in
                 the presence of the memory constraint. Specifically, as
                 opposed to most previous works that concentrate on
                 improving the mining efficiency or on reducing the
                 memory size by best effort, we first attempt to specify
                 the available upper memory size that can be utilized by
                 mining frequent itemsets. To comply with the upper
                 bound of the memory consumption, two efficient
                 algorithms, called MTK and MTK\_Close, are devised for
                 mining frequent itemsets and closed itemsets,
                 respectively, without specifying the subtle minimum
                 support. Instead, users only need to give a more
                 human-understandable parameter, namely the desired
                 number of frequent (closed) itemsets $k$. In practice,
                 it is quite challenging to constrain the memory
                 consumption while also efficiently retrieving top-$k$
                 itemsets. To effectively achieve this, MTK and
                 MTK\_Close are devised as level-wise search algorithms,
                 where the number of candidates being
                 generated-and-tested in each database scan will be
                 limited. A novel search approach, called {\^A}`?-stair
                 search, is utilized in MTK and MTK\_Close to
                 effectively assign the available memory for testing
                 candidate itemsets with various itemset-lengths, which
                 leads to a small number of required database scans. As
                 demonstrated in the empirical study on real data and
                 synthetic data, instead of only providing the
                 flexibility of striking a compromise between the
                 execution efficiency and the memory consumption, MTK
                 and MTK\_Close can both achieve high efficiency and
                 have a constrained memory bound, showing the prominent
                 advantage to be practical algorithms of mining frequent
                 patterns.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Catarci:2008:GES,
  author =       "Tiziana Catarci and Ren{\'e} J. Miller",
  title =        "Guest editorial: special issue on metadata
                 management",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1345--1346",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0112-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Atzeni:2008:MIS,
  author =       "Paolo Atzeni and Paolo Cappellari and Riccardo Torlone
                 and Philip A. Bernstein and Giorgio Gianforme",
  title =        "Model-independent schema translation",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1347--1370",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0105-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We discuss a proposal for the implementation of the
                 model management operator ModelGen, which translates
                 schemas from one model to another, for example from
                 object-oriented to SQL or from SQL to XML schema
                 descriptions. The operator can be used to generate
                 database wrappers (e.g., object-oriented or XML to
                 relational), default user interfaces (e.g., relational
                 to forms), or default database schemas from other
                 representations. The approach translates schemas from a
                 model to another, within a predefined, but large and
                 extensible, set of models: given a source schema S
                 expressed in a source model, and a target model TM, it
                 generates a schema $S'$ expressed in TM that is
                 'equivalent' to $S$. A wide family of models is handled
                 by using a metamodel in which models can be succinctly
                 and precisely described. The approach expresses the
                 translation as Datalog rules and exposes the source and
                 target of the translation in a generic relational
                 dictionary. This makes the translation transparent,
                 easy to customize and model-independent. The proposal
                 includes automatic generation of translations as
                 composition of basic steps.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data models; Model management; Schema translation",
}

@Article{Cudre-Mauroux:2008:PMM,
  author =       "Philippe Cudr{\'e}-Mauroux and Adriana Budura and
                 Manfred Hauswirth and Karl Aberer",
  title =        "{PicShark}: mitigating metadata scarcity through
                 large-scale {P2P} collaboration",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1371--1384",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0103-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the commoditization of digital devices, personal
                 information and media sharing is becoming a key
                 application on the pervasive Web. In such a context,
                 data annotation rather than data production is the main
                 bottleneck. Metadata scarcity represents a major
                 obstacle preventing efficient information processing in
                 large and heterogeneous communities. However, social
                 communities also open the door to new possibilities for
                 addressing local metadata scarcity by taking advantage
                 of global collections of resources. We propose to
                 tackle the lack of metadata in large-scale distributed
                 systems through a collaborative process leveraging on
                 both content and metadata. We develop a community-based
                 and self-organizing system called PicShark in which
                 information entropy--in terms of missing metadata--is
                 gradually alleviated through decentralized instance and
                 schema matching. Our approach focuses on
                 semi-structured metadata and confines computationally
                 expensive operations to the edge of the network, while
                 keeping distributed operations as simple as possible to
                 ensure scalability. PicShark builds on structured
                 Peer-to-Peer networks for distributed look-up
                 operations, but extends the application of
                 self-organization principles to the propagation of
                 metadata and the creation of schema mappings. We
                 demonstrate the practical applicability of our method
                 in an image sharing scenario and provide experimental
                 evidences illustrating the validity of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Metadata entropy; Metadata heterogeneity; Metadata
                 scarcity; Peer data management; Peer-to-Peer
                 collaboration",
}

@Article{Cruz:2008:LFS,
  author =       "Isabel F. Cruz and Huiyong Xiao",
  title =        "A layered framework supporting personal information
                 integration and application design for the semantic
                 desktop",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1385--1406",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0102-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the development of inexpensive storage devices,
                 space usage is no longer a bottleneck for computer
                 users. However, the increasingly large amount of
                 personal information poses a critical problem to those
                 users: traditional file organization in hierarchical
                 directories may not be suited to the effective
                 management of personal information because it ignores
                 the semantic associations therein and bears no
                 connection with the applications that users will run.
                 To address such limitations, we present our vision of a
                 semantic desktop, which relies on the use of ontologies
                 to annotate and organize data and on the concept of
                 personal information application (PIA), which is
                 associated with a user's task. The PIA designer is the
                 tool that is provided for building a variety of PIAs
                 consisting of views (e.g., text, list, table, graph),
                 which are spatially arranged and display interrelated
                 fragments of the overall personal information. The
                 semantic organization of the data follows a layered
                 architecture that models separately the personal
                 information, the domain data, and the application data.
                 The network of concepts that ensues from extensive
                 annotation and explicit associations lends itself well
                 to rich browsing capabilities and to the formulation of
                 expressive database-like queries. These queries are
                 also the basis for the interaction among views of the
                 PIAs in the same desktop or in networked desktops. In
                 the latter case, the concept of desktop service
                 provides for a semantic platform for the integration of
                 information across different desktops and the web. In
                 this paper, we present in detail the semantic
                 organization of the information, the overall system
                 architecture and implementation aspects, queries and
                 their processing, PIAs and the PIA designer, including
                 usability studies on the designer, and the concepts of
                 semantic navigation in a desktop and of interoperation
                 in a network of desktops.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Candan:2008:SSE,
  author =       "K. Sel{\c{c}}uk Candan and Huiping Cao and Yan Qi and
                 Maria Luisa Sapino",
  title =        "System support for exploration and expert feedback in
                 resolving conflicts during integration of metadata",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1407--1444",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0109-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A critical reality in integration is that knowledge
                 obtained from different sources may often be
                 conflicting. Conflict-resolution, whether performed
                 during the design phase or during run-time, can be
                 costly and, if done without a proper understanding of
                 the usage context, can be ineffective. In this paper,
                 we propose a novel exploration and feedback-based
                 approach [FICSR (Pronounced as 'fixer')] to
                 conflict-resolution when integrating metadata from
                 different sources. Rather than relying on purely
                 automated conflict-resolution mechanisms, FICSR brings
                 the domain expert in the conflict-resolution process
                 and informs the integration based on the expert's
                 feedback. In particular, instead of relying on
                 traditional model based definition of consistency
                 (which, whenever there are conflicts, picks a possible
                 world among many), we introduce a ranked interpretation
                 of the metadata and statements about the metadata. This
                 not only enables FICSR to avoid committing to an
                 interpretation too early, but also helps in achieving a
                 more direct correspondence between the experts'
                 (subjective) interpretation of the data and the
                 system's (objective) treatment of the available
                 alternatives. Consequently, the ranked interpretation
                 leads to new opportunities for exploratory feedback for
                 conflict-resolution: within the context of a given
                 statement of interest, (a) a preliminary ranking of
                 candidate matches, representing different resolutions
                 of the conflicts, informs the user about the
                 alternative interpretations of the metadata, while (b)
                 user feedback regarding the preferences among
                 alternatives is exploited to inform the system about
                 the expert's relevant domain knowledge. The expert's
                 feedback, then, is used for resolving not only the
                 conflicts among different sources, but also possible
                 mis-alignments due to the initial matching phase. To
                 enable this {(system
                 \stackrel{\_{informs}}{\longleftrightarrow} user)}
                 feedback process, we develop data structures and
                 algorithms for efficient off-line conflict/agreement
                 analysis of the integrated metadata. We also develop
                 algorithms for efficient on-line query processing,
                 candidate result enumeration, validity analysis, and
                 system feedback. The results are brought together and
                 evaluated in the Feedback-based InConSistency
                 Resolution (FICSR) system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Exploration of alternatives; Feedback-based
                 conflict-resolution; Metadata integration; System
                 feedback; Taxonomy; User feedback",
}

@Article{Wang:2008:AXB,
  author =       "Fusheng Wang and Carlo Zaniolo and Xin Zhou",
  title =        "{ArchIS}: an {XML}-based approach to transaction-time
                 temporal database systems",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1445--1463",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0086-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Effective support for temporal applications by
                 database systems represents an important technical
                 objective that is difficult to achieve since it
                 requires an integrated solution for several problems,
                 including (i) expressive temporal representations and
                 data models, (ii) powerful languages for temporal
                 queries and snapshot queries, (iii) indexing,
                 clustering and query optimization techniques for
                 managing temporal information efficiently, and (iv)
                 architectures that bring together the different pieces
                 of enabling technology into a robust system. In this
                 paper, we present the ArchIS system that achieves these
                 objectives by supporting a temporally grouped data
                 model on top of RDBMS. ArchIS' architecture uses (a)
                 XML to support temporally grouped (virtual)
                 representations of the database history, (b) XQuery to
                 express powerful temporal queries on such views, (c)
                 temporal clustering and indexing techniques for
                 managing the actual historical data in a relational
                 database, and (d) SQL/XML for executing the queries on
                 the XML views as equivalent queries on the relational
                 database. The performance studies presented in the
                 paper show that ArchIS is quite effective at storing
                 and retrieving under complex query conditions the
                 transaction-time history of relational databases, and
                 can also assure excellent storage efficiency by
                 providing compression as an option. This approach
                 achieves full-functionality transaction-time databases
                 without requiring temporal extensions in XML or
                 database standards, and provides critical support to
                 emerging application areas such as RFID.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Temporal database; Temporal grouping; Temporal query;
                 XML database; XQuery",
}

@Article{Zhou:2008:DSD,
  author =       "Yongluan Zhou and Beng Chin Ooi and Kian-Lee Tan",
  title =        "Disseminating streaming data in a dynamic environment:
                 an adaptive and cost-based approach",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1465--1483",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0077-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In a distributed stream processing system, streaming
                 data are continuously disseminated from the sources to
                 the distributed processing servers. To enhance the
                 dissemination efficiency, these servers are typically
                 organized into one or more dissemination trees. In this
                 paper, we focus on the problem of constructing
                 dissemination trees to minimize the average loss of
                 fidelity of the system. We observe that existing
                 heuristic-based approaches can only explore a limited
                 solution space and hence may lead to sub-optimal
                 solutions. On the contrary, we propose an adaptive and
                 cost-based approach. Our cost model takes into account
                 both the processing cost and the communication cost.
                 Furthermore, as a distributed stream processing system
                 is vulnerable to inaccurate statistics, runtime
                 fluctuations of data characteristics, server workloads,
                 and network conditions, we have designed our scheme to
                 be adaptive to these situations: an operational
                 dissemination tree may be incrementally transformed to
                 a more cost-effective one. Our adaptive strategy
                 employs distributed decisions made by the distributed
                 servers independently based on localized statistics
                 collected by each server at runtime. For a relatively
                 static environment, we also propose two static tree
                 construction algorithms relying on a priori system
                 statistics. These static trees can also be used as
                 initial trees in a dynamic environment. We apply our
                 schemes to both single- and multi-object dissemination.
                 Our extensive performance study shows that the adaptive
                 mechanisms are effective in a dynamic context and the
                 proposed static tree construction algorithms perform
                 close to optimal in a static environment.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Dissemination trees; Distributed stream processing;
                 Streaming data dissemination",
}

@Article{Kim:2008:SOF,
  author =       "Min-Soo Kim and Kyu-Young Whang and Jae-Gil Lee and
                 Min-Jae Lee",
  title =        "Structural optimization of a full-text $n$-gram index
                 using relational normalization",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1485--1507",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0082-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "As the amount of text data grows explosively, an
                 efficient index structure for large text databases
                 becomes ever important. The $n$-gram inverted index
                 (simply, the $n$-gram index) has been widely used in
                 information retrieval or in approximate string matching
                 due to its two major advantages: language-neutral and
                 error-tolerant. Nevertheless, the $n$-gram index also
                 has drawbacks: the size tends to be very large, and the
                 performance of queries tends to be bad. In this paper,
                 we propose the two-level $n$-gram inverted index
                 (simply, the $n$-gram/2L index) that significantly
                 reduces the size and improves the query performance by
                 using the relational normalization theory. We first
                 identify that, in the (full-text) $n$-gram index, there
                 exists redundancy in the position information caused by
                 a non-trivial multivalued dependency. The proposed
                 index eliminates such redundancy by constructing the
                 index in two levels: the front-end index and the
                 back-end index. We formally prove that this two-level
                 construction is identical to the relational
                 normalization process. We call this process structural
                 optimization of the $n$-gram index. The $n$-gram/2L
                 index has excellent properties: (1) it significantly
                 reduces the size and improves the performance compared
                 with the $n$-gram index with these improvements
                 becoming more marked as the database size gets larger;
                 (2) the query processing time increases only very
                 slightly as the query length gets longer. Experimental
                 results using real databases of 1~GB show that the size
                 of the $n$-gram/2L index is reduced by up to 1.9---2.4
                 times and, at the same time, the query performance is
                 improved by up to 13.1 times compared with those of the
                 $n$-gram index. We also compare the $n$-gram/2L index
                 with Makinen's compact suffix array (CSA) (Proc. 11th
                 Annual Symposium on Combinatorial Pattern Matching
                 pp.~305---319, 2000) stored in disk. Experimental
                 results show that the $n$-gram/2L index outperforms the
                 CSA when the query length is short (i.e., less than
                 15---20), and the CSA is similar to or better than the
                 $n$-gram/2L index when the query length is long (i.e.,
                 more than 15---20).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "$n$-gram; Inverted index; Multivalued dependency; Text
                 search",
}

@Article{Guha:2008:STO,
  author =       "Sudipto Guha",
  title =        "On the space--time of optimal, approximate and
                 streaming algorithms for synopsis construction
                 problems",
  journal =      j-VLDB-J,
  volume =       "17",
  number =       "6",
  pages =        "1509--1535",
  month =        nov,
  year =         "2008",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0083-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 22 09:20:08 MDT 2008",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Synopses construction algorithms have been found to be
                 of interest in query optimization, approximate query
                 answering and mining, and over the last few years
                 several good synopsis construction algorithms have been
                 proposed. These algorithms have mostly focused on the
                 running time of the synopsis construction vis-a-vis the
                 synopsis quality. However the space complexity of
                 synopsis construction algorithms has not been
                 investigated as thoroughly. Many of the optimum
                 synopsis construction algorithms are expensive in
                 space. For some of these algorithms the space required
                 to construct the synopsis is significantly larger than
                 the space required to store the input. These algorithms
                 rely on the fact that they require a smaller 'working
                 space' and most of the data can be resident on disc.
                 The large space complexity of synopsis construction
                 algorithms is a handicap in several scenarios. In the
                 case of streaming algorithms, space is a fundamental
                 constraint. In case of offline optimal or approximate
                 algorithms, a better space complexity often makes these
                 algorithms much more attractive by allowing them to run
                 in main memory and not use disc, or alternately allows
                 us to scale to significantly larger problems without
                 running out of space. In this paper, we propose a
                 simple and general technique that reduces space
                 complexity of synopsis construction algorithms. As a
                 consequence we show that the notion of 'working space'
                 proposed in these contexts is redundant. This technique
                 can be easily applied to many existing algorithms for
                 synopsis construction problems. We demonstrate the
                 performance benefits of our proposal through
                 experiments on real-life and synthetic data. We believe
                 that our algorithm also generalizes to a broader range
                 of dynamic programs beyond synopsis construction.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lakhal:2009:FFE,
  author =       "Neila Ben Lakhal and Takashi Kobayashi and Haruo
                 Yokota",
  title =        "{FENECIA}: failure endurable nested-transaction based
                 execution of composite {Web} services with incorporated
                 state analysis",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "1--56",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0076-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Interest in the Web services (WS) composition (WSC)
                 paradigm is increasing tremendously. A real shift in
                 distributed computing history is expected to occur when
                 the dream of implementing Service-Oriented Architecture
                 (SOA) is realized. However, there is a long way to go
                 to achieve such an ambitious goal. In this paper, we
                 support the idea that, when challenging the WSC issue,
                 the earlier that the inevitability of failures is
                 recognized and proper failure-handling mechanisms are
                 defined, from the very early stage of the composite WS
                 (CWS) specification, the greater are the chances of
                 achieving a significant gain in dependability. To
                 formalize this vision, we present the FENECIA (Failure
                 Endurable Nested-transaction based Execution of
                 Composite Web services with Incorporated state
                 Analysis) framework. Our framework approaches the WSC
                 issue from different points of view to guarantee a high
                 level of dependability. In particular, it aims at being
                 simultaneously a failure-handling-devoted CWS
                 specification, execution, and quality of service (QoS)
                 assessment approach. In the first section of our
                 framework, we focus on answering the need for a
                 specification model tailored for the WS architecture.
                 To this end, we introduce WS-SAGAS, a new transaction
                 model. WS-SAGAS introduces key concepts that are not
                 part of the WS architecture pillars, namely, arbitrary
                 nesting, state, vitality degree, and compensation, to
                 specify failure-endurable CWS as a hierarchy of
                 recursively nested transactions. In addition, to define
                 the CWS execution semantics, without suffering from the
                 hindrance of an XML-based notation, we describe a
                 textual notation that describes a WSC in terms of
                 definition rules, composability rules, and ordering
                 rules, and we introduce graphical and formal notations.
                 These rules provide the solid foundation needed to
                 formulate the execution semantics of a CWS in terms of
                 execution correctness verification dependencies. To
                 ensure dependable execution of the CWS, we present in
                 the second section of FENECIA our architecture THROWS,
                 in which the execution control of the resulting CWS is
                 distributed among engines, discovered dynamically, that
                 communicate in a peer-to-peer fashion. A dependable
                 execution is guaranteed in THROWS by keeping track of
                 the execution progress of a CWS and by enforcing
                 forward and backward recovery. We concentrate in the
                 third section of our approach on showing how the
                 failure consideration is trivial in acquiring more
                 accurate CWS QoS estimations. We propose a model that
                 assesses several QoS properties of CWS, which are
                 specified as WS-SAGAS transactions and executed in
                 THROWS. We validate our proposal and show its
                 feasibility and broad applicability by describing an
                 implemented prototype and a case study.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Composition; Dependability; Distributed execution;
                 Failure; QoS; Transaction model; Web services",
}

@Article{Sharifzadeh:2009:AVC,
  author =       "Mehdi Sharifzadeh and Cyrus Shahabi",
  title =        "Approximate {Voronoi} cell computation on spatial data
                 streams",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "57--75",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0081-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Several studies have exploited the properties of
                 Voronoi diagrams to improve the efficiency of
                 variations of the nearest neighbor search on stored
                 datasets. However, the significance of Voronoi diagrams
                 and their basic building blocks, Voronoi cells, has
                 been neglected when the geometry data is incrementally
                 becoming available as a data stream. In this paper, we
                 study the problem of Voronoi cell computation for fixed
                 2-d site points when the locations of the neighboring
                 sites arrive as a spatial data stream. We show that the
                 non-streaming solution to the problem does not meet the
                 memory requirements of many realistic scenarios over a
                 sliding window. Hence, we propose AVC-SW, an
                 approximate streaming algorithm that computes $(1 +
                 \epsilon)$-approximations to the actual exact Voronoi
                 cell in $O(\kappa)$ where $\kappa$ is its sample size.
                 With the sliding window model and random arrival of
                 points, we show both analytically and experimentally
                 that for given window size $w$ and parameter $k$,
                 AVC-SW reduces the expected memory requirements of the
                 classic algorithm from $O(w)$ to $O(k \log (\frac{w}{k}
                 + 1))$ regardless of the distribution of the points in
                 the 2-d space. This is a significant improvement for
                 most of the real-world scenarios where $w \gg k$.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Approximation; Sliding window; Spatial data stream;
                 Voronoi cell",
}

@Article{Vlachos:2009:OPV,
  author =       "Michail Vlachos and Aris Anagnostopoulos and Olivier
                 Verscheure and Philip S. Yu",
  title =        "Online pairing of {VoIP} conversations",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "77--98",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0087-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper answers the following question; given a
                 multiplicity of evolving 1-way conversations, can a
                 machine or an algorithm discern the conversational
                 pairs in an online fashion, without understanding the
                 content of the communications? Our analysis indicates
                 that this is possible, and can be achieved just by
                 exploiting the temporal dynamics inherent in a
                 conversation. We also show that our findings are
                 applicable for anonymous and encrypted conversations
                 over VoIP networks. We achieve this by exploiting the
                 aperiodic inter-departure time of VoIP packets, hence
                 trivializing each VoIP stream into a binary
                 time-series, indicating the voice activity of each
                 stream. We propose effective techniques that
                 progressively pair conversing parties with high
                 accuracy and in a limited amount of time. Our findings
                 are verified empirically on a dataset consisting of
                 1,000 conversations. We obtain very high pairing
                 accuracy that reaches 97\% after 5 min of voice
                 conversations. Using a modeling approach we also
                 demonstrate analytically that our result can be
                 extended over an unlimited number of conversations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Binary time-series clustering; Conversation pairing;
                 Stream clustering; Voice-over-IP",
}

@Article{Yao:2009:LMK,
  author =       "Yuxia Yao and Xueyan Tang and Ee-Peng Lim",
  title =        "Localized monitoring of {kNN} queries in wireless
                 sensor networks",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "99--117",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0089-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Wireless sensor networks have been widely used in
                 civilian and military applications. Primarily designed
                 for monitoring purposes, many sensor applications
                 require continuous collection and processing of sensed
                 data. Due to the limited power supply for sensor nodes,
                 energy efficiency is a major performance concern in
                 query processing. In this paper, we focus on continuous
                 k NN query processing in object tracking sensor
                 networks. We propose a localized scheme to monitor
                 nearest neighbors to a query point. The key idea is to
                 establish a monitoring area for each query so that only
                 the updates relevant to the query are collected. The
                 monitoring area is set up when the k NN query is
                 initially evaluated and is expanded and shrunk on the
                 fly upon object movement. We analyze the optimal
                 maintenance of the monitoring area and develop an
                 adaptive algorithm to dynamically decide when to shrink
                 the monitoring area. Experimental results show that
                 establishing a monitoring area for continuous k NN
                 query processing greatly reduces energy consumption and
                 prolongs network lifetime.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Paton:2009:AQP,
  author =       "Norman W. Paton and Jorge Buenabad-Chavez and Mengsong
                 Chen and Vijayshankar Raman and Garret Swart and
                 Inderpal Narang and Daniel M. Yellin and Alvaro
                 A. Fernandes",
  title =        "Autonomic query parallelization using non-dedicated
                 computers: an evaluation of adaptivity options",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "119--140",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-007-0090-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Writing parallel programs that can take advantage of
                 non-dedicated processors is much more difficult than
                 writing such programs for networks of dedicated
                 processors. In a non-dedicated environment such
                 programs must use autonomic techniques to respond to
                 the unpredictable load fluctuations that prevail in the
                 computational environment. In adaptive query processing
                 (AQP), several techniques have been proposed for
                 dynamically redistributing processor load assignments
                 throughout a computation to take account of varying
                 resource capabilities, but we know of no previous study
                 that compares their performance. This paper presents a
                 simulation-based evaluation of these autonomic
                 parallelization techniques in a uniform environment and
                 compares how well they improve the performance of the
                 computation. Four published strategies are compared
                 with a new algorithm that seeks to overcome some
                 weaknesses identified in the existing approaches. In
                 addition, we explore the use of techniques from online
                 algorithms to provide a firm foundation for determining
                 when to adapt in two of the existing algorithms. The
                 evaluations identify situations in which each strategy
                 may be used effectively and in which it should be
                 avoided.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Park:2009:ESR,
  author =       "Laurence A. Park and Kotagiri Ramamohanarao",
  title =        "Efficient storage and retrieval of probabilistic
                 latent semantic information for information retrieval",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "141--155",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0093-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Probabilistic latent semantic analysis (PLSA) is a
                 method for computing term and document relationships
                 from a document set. The probabilistic latent semantic
                 index (PLSI) has been used to store PLSA information,
                 but unfortunately the PLSI uses excessive storage space
                 relative to a simple term frequency index, which causes
                 lengthy query times. To overcome the storage and speed
                 problems of PLSI, we introduce the probabilistic latent
                 semantic thesaurus (PLST); an efficient and effective
                 method of storing the PLSA information. We show that
                 through methods such as document thresholding and term
                 pruning, we are able to maintain the high precision
                 results found using PLSA while using a very small
                 percent (0.15\%) of the storage space of PLSI.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Probabilistic latent semantic analysis; Query
                 expansion; Thesaurus",
}

@Article{Askitis:2009:BTD,
  author =       "Nikolas Askitis and Justin Zobel",
  title =        "{B}-tries for disk-based string management",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "157--179",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0094-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A wide range of applications require that large
                 quantities of data be maintained in sort order on disk.
                 The B-tree, and its variants, are an efficient
                 general-purpose disk-based data structure that is
                 almost universally used for this task. The B-trie has
                 the potential to be a competitive alternative for the
                 storage of data where strings are used as keys, but has
                 not previously been thoroughly described or tested. We
                 propose new algorithms for the insertion, deletion, and
                 equality search of variable-length strings in a
                 disk-resident B-trie, as well as novel splitting
                 strategies which are a critical element of a practical
                 implementation. We experimentally compare the B-trie
                 against variants of B-tree on several large sets of
                 strings with a range of characteristics. Our results
                 demonstrate that, although the B-trie uses more memory,
                 it is faster, more scalable, and requires less disk
                 space.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "B-tree; Burst trie; Data structures; Secondary
                 storage; Vocabulary accumulation; Word-level indexing",
}

@Article{Joshi:2009:SBE,
  author =       "Shantanu Joshi and Christopher Jermaine",
  title =        "Sampling-based estimators for subset-based queries",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "181--202",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0095-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider the problem of using sampling to estimate
                 the result of an aggregation operation over a
                 subset-based SQL query, where a subquery is correlated
                 to an outer query by a NOT EXISTS, NOT IN, EXISTS or IN
                 clause. We design an unbiased estimator for our query
                 and prove that it is indeed unbiased. We then provide a
                 second, biased estimator that makes use of the
                 superpopulation concept from statistics to minimize the
                 mean squared error of the resulting estimate. The two
                 estimators are tested over an extensive set of
                 experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Aggregate query processing; Approximate query
                 processing; Sampling",
}

@Article{Sacharidis:2009:HCW,
  author =       "Dimitris Sacharidis and Antonios Deligiannakis and
                 Timos Sellis",
  title =        "Hierarchically compressed wavelet synopses",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "203--231",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0096-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The wavelet decomposition is a proven tool for
                 constructing concise synopses of large data sets that
                 can be used to obtain fast approximate answers.
                 Existing research studies focus on selecting an optimal
                 set of wavelet coefficients to store so as to minimize
                 some error metric, without however seeking to reduce
                 the size of the wavelet coefficients themselves. In
                 many real data sets the existence of large spikes in
                 the data values results in many large coefficient
                 values lying on paths of a conceptual tree structure
                 known as the error tree. To exploit this fact, we
                 introduce in this paper a novel compression scheme for
                 wavelet synopses, termed hierarchically compressed
                 wavelet synopses, that fully exploits hierarchical
                 relationships among coefficients in order to reduce
                 their storage. Our proposed compression scheme allows
                 for a larger number of coefficients to be stored for a
                 given space constraint thus resulting in increased
                 accuracy of the produced synopsis. We propose optimal,
                 approximate and greedy algorithms for constructing
                 hierarchically compressed wavelet synopses that
                 minimize the sum squared error while not exceeding a
                 given space budget. Extensive experimental results on
                 both synthetic and real-world data sets validate our
                 novel compression scheme and demonstrate the
                 effectiveness of our algorithms against existing
                 synopsis construction algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Compression; Data streams; Wavelet synopsis",
}

@Article{Theodoratos:2009:CPS,
  author =       "Dimitri Theodoratos and Pawel Placek and Theodore
                 Dalamagas and Stefanos Souldatos and Timos Sellis",
  title =        "Containment of partially specified tree-pattern
                 queries in the presence of dimension graphs",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "233--254",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0097-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Nowadays, huge volumes of data are organized or
                 exported in tree-structured form. Querying capabilities
                 are provided through tree-pattern queries. The need for
                 querying tree-structured data sources when their
                 structure is not fully known, and the need to integrate
                 multiple data sources with different tree structures
                 have driven, recently, the suggestion of query
                 languages that relax the complete specification of a
                 tree pattern. In this paper, we consider a query
                 language that allows the partial specification of a
                 tree pattern. Queries in this language range from
                 structureless keyword-based queries to completely
                 specified tree patterns. To support the evaluation of
                 partially specified queries, we use semantically rich
                 constructs, called dimension graphs, which abstract
                 structural information of the tree-structured data. We
                 address the problem of query containment in the
                 presence of dimension graphs and we provide necessary
                 and sufficient conditions for query containment. As
                 checking query containment can be expensive, we suggest
                 two heuristic approaches for query containment in the
                 presence of dimension graphs. Our approaches are based
                 on extracting structural information from the dimension
                 graph that can be added to the queries while preserving
                 equivalence with respect to the dimension graph. We
                 considered both cases: extracting and storing different
                 types of structural information in advance, and
                 extracting information on-the-fly (at query time). Both
                 approaches are implemented, validated, and compared
                 through experimental evaluation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Partial tree-pattern query; Query containment;
                 Tree-structured data; XML",
}

@Article{Benjelloun:2009:SGA,
  author =       "Omar Benjelloun and Hector Garcia-Molina and David
                 Menestrina and Qi Su and Steven Euijong Whang and
                 Jennifer Widom",
  title =        "{Swoosh}: a generic approach to entity resolution",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "255--276",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0098-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider the entity resolution (ER) problem (also
                 known as deduplication, or merge---purge), in which
                 records determined to represent the same real-world
                 entity are successively located and merged. We
                 formalize the generic ER problem, treating the
                 functions for comparing and merging records as
                 black-boxes, which permits expressive and extensible ER
                 solutions. We identify four important properties that,
                 if satisfied by the match and merge functions, enable
                 much more efficient ER algorithms. We develop three
                 efficient ER algorithms: G-Swoosh for the case where
                 the four properties do not hold, and R-Swoosh and
                 F-Swoosh that exploit the four properties. F-Swoosh in
                 addition assumes knowledge of the 'features' (e.g.,
                 attributes) used by the match function. We
                 experimentally evaluate the algorithms using comparison
                 shopping data from Yahoo! Shopping and hotel
                 information data from Yahoo! Travel. We also show that
                 R-Swoosh (and F-Swoosh) can be used even when the four
                 match and merge properties do not hold, if an
                 'approximate' result is acceptable.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data cleaning; Entity resolution; Generic entity
                 resolution",
}

@Article{Ratprasartporn:2009:CBL,
  author =       "Nattakarn Ratprasartporn and Jonathan Po and Ali
                 Cakmak and Sulieman Bani-Ahmad and Gultekin Ozsoyoglu",
  title =        "Context-based literature digital collection search",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "277--301",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0099-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We identify two issues with searching literature
                 digital collections within digital libraries: (a) there
                 are no effective paper-scoring and ranking mechanisms.
                 Without a scoring and ranking system, users are often
                 forced to scan a large and diverse set of publications
                 listed as search results and potentially miss the
                 important ones. (b) Topic diffusion is a common
                 problem: publications returned by a keyword-based
                 search query often fall into multiple topic areas, not
                 all of which are of interest to users. This paper
                 proposes a new literature digital collection search
                 paradigm that effectively ranks search outputs, while
                 controlling the diversity of keyword-based search query
                 output topics. Our approach is as follows. First,
                 during pre-querying, publications are assigned into
                 pre-specified ontology-based contexts, and
                 query-independent context scores are attached to papers
                 with respect to the assigned contexts. When a query is
                 posed, relevant contexts are selected, search is
                 performed within the selected contexts, context scores
                 of publications are revised into relevancy scores with
                 respect to the query at hand and the context that they
                 are in, and query outputs are ranked within each
                 relevant context. This way, we (1) minimize query
                 output topic diversity, (2) reduce query output size,
                 (3) decrease user time spent scanning query results,
                 and (4) increase query output ranking accuracy. Using
                 genomics-oriented PubMed publications as the testbed
                 and Gene Ontology terms as contexts, our experiments
                 indicate that the proposed context-based search
                 approach produces search results with up to 50\% higher
                 precision, and reduces the query output size by up to
                 70\%.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Context score; Context-based search; Digital
                 collections; Ontology; Ranking",
}

@Article{Chiu:2009:EFS,
  author =       "Ding-Ying Chiu and Yi-Hung Wu and Arbee L. Chen",
  title =        "Efficient frequent sequence mining by a dynamic
                 strategy switching algorithm",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "303--327",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0100-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Mining frequent sequences in large databases has been
                 an important research topic. The main challenge of
                 mining frequent sequences is the high processing cost
                 due to the large amount of data. In this paper, we
                 propose a novel strategy to find all the frequent
                 sequences without having to compute the support counts
                 of non-frequent sequences. The previous works prune
                 candidate sequences based on the frequent sequences
                 with shorter lengths, while our strategy prunes
                 candidate sequences according to the non-frequent
                 sequences with the same lengths. As a result, our
                 strategy can cooperate with the previous works to
                 achieve a better performance. We then identify three
                 major strategies used in the previous works and combine
                 them with our strategy into an efficient algorithm. The
                 novelty of our algorithm lies in its ability to
                 dynamically switch from a previous strategy to our new
                 strategy in the mining process for a better
                 performance. Experiment results show that our algorithm
                 outperforms the previous ones under various parameter
                 settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data mining; Frequent sequence; Sequence comparison;
                 Strategy switching",
}

@Article{Shen:2009:SII,
  author =       "Heng Tao Shen and Shouxu Jiang and Kian-Lee Tan and Zi
                 Huang and Xiaofang Zhou",
  title =        "Speed up interactive image retrieval",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "329--343",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0101-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In multimedia retrieval, a query is typically
                 interactively refined towards the 'optimal' answers by
                 exploiting user feedback. However, in existing work, in
                 each iteration, the refined query is re-evaluated. This
                 is not only inefficient but fails to exploit the
                 answers that may be common between iterations.
                 Furthermore, it may also take too many iterations to
                 get the 'optimal' answers. In this paper, we introduce
                 a new approach called OptRFS (optimizing relevance
                 feedback search by query prediction) for iterative
                 relevance feedback search. OptRFS aims to take users to
                 view the 'optimal' results as fast as possible. It
                 optimizes relevance feedback search by both shortening
                 the searching time during each iteration and reducing
                 the number of iterations. OptRFS predicts the potential
                 candidates for the next iteration and maintains this
                 small set for efficient sequential scan. By doing so,
                 repeated candidate accesses (i.e., random accesses) can
                 be saved, hence reducing the searching time for the
                 next iteration. In addition, efficient scan on the
                 overlap before the next search starts also tightens the
                 search space with smaller pruning radius. As a step
                 forward, OptRFS also predicts the 'optimal' query,
                 which corresponds to 'optimal' answers, based on the
                 early executed iterations' queries. By doing so, some
                 intermediate iterations can be saved, hence reducing
                 the total number of iterations. By taking the
                 correlations among the early executed iterations into
                 consideration, OptRFS investigates linear regression,
                 exponential smoothing and linear exponential smoothing
                 to predict the next refined query so as to decide the
                 overlap of candidates between two consecutive
                 iterations. Considering the special features of
                 relevance feedback, OptRFS further introduces adaptive
                 linear exponential smoothing to self-adjust the
                 parameters for more accurate prediction. We implemented
                 OptRFS and our experimental study on real life data
                 sets show that it can reduce the total cost of
                 relevance feedback search significantly. Some
                 interesting features of relevance feedback search are
                 also discovered and discussed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Image retrieval; Indexing; Query processing; Relevance
                 feedback",
}

@Article{Wang:2009:SFS,
  author =       "Shiyuan Wang and Quang Hieu Vu and Beng Chin Ooi and
                 Anthony K. Tung and Lizhen Xu",
  title =        "{Skyframe}: a framework for skyline query processing
                 in peer-to-peer systems",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "345--362",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0104-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper looks at the processing of skyline queries
                 on peer-to-peer (P2P) networks. We propose Skyframe, a
                 framework for efficient skyline query processing in P2P
                 systems, which addresses the challenges of quick
                 response time, low network communication cost and query
                 load balancing among peers. Skyframe consists of two
                 querying methods: one is optimized for network
                 communication while the other focuses on query response
                 time. These methods are different in the way in which
                 the query search space is defined. In particular, the
                 first method uses a high dominating point that has a
                 large dominating region to prune the search space to
                 achieve a low cost in network communication. On the
                 other hand, the second method relaxes the search space
                 in order to allow parallel query processing to speed up
                 query response. Skyframe achieves query load balancing
                 by both query load conscious data space
                 splitting/merging during the join/departure of nodes
                 and dynamic load migration. We further show how to
                 apply Skyframe to both the P2P systems supporting
                 multi-dimensional indexing and the P2P systems
                 supporting single-dimensional indexing. Finally, we
                 have conducted extensive experiments on both real and
                 synthetic data sets over two existing P2P systems: CAN
                 (Ratnasamy in A scalable content-addressable network.
                 In: Proceedings of SIGCOMM Conference, pp. 161---172,
                 2001) and BATON (Jagadish et al. in A balanced tree
                 structure for peer-to-peer networks. In: Proceedings of
                 VLDB Conference, pp. 661---672, 2005) to evaluate the
                 effectiveness and scalability of Skyframe.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Framework; Load balancing; Optimization; Peer-to-peer
                 systems; Skyline query processing",
}

@Article{Mouratidis:2009:PMD,
  author =       "Kyriakos Mouratidis and Dimitris Sacharidis and
                 Hweehwa Pang",
  title =        "Partially materialized digest scheme: an efficient
                 verification method for outsourced databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "1",
  pages =        "363--381",
  month =        jan,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0108-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 15:49:59 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In the outsourced database model, a data owner
                 publishes her database through a third-party server;
                 i.e., the server hosts the data and answers user
                 queries on behalf of the owner. Since the server may
                 not be trusted, or may be compromised, users need a
                 means to verify that answers received are both
                 authentic and complete, i.e., that the returned data
                 have not been tampered with, and that no qualifying
                 results have been omitted. We propose a result
                 verification approach for one-dimensional queries,
                 called Partially Materialized Digest scheme (PMD), that
                 applies to both static and dynamic databases. PMD uses
                 separate indexes for the data and for their associated
                 verification information, and only partially
                 materializes the latter. In contrast with previous
                 work, PMD avoids unnecessary costs when processing
                 queries that do not request verification, achieving the
                 performance of an ordinary index (e.g., a B$^+$ -tree).
                 On the other hand, when an authenticity and
                 completeness proof is required, PMD outperforms the
                 existing state-of-the-art technique by a wide margin,
                 as we demonstrate analytically and experimentally.
                 Furthermore, we design two verification methods for
                 spatial queries. The first, termed Merkle R-tree
                 (MR-tree), extends the conventional approach of
                 embedding authentication information into the data
                 index (i.e., an R-tree). The second, called Partially
                 Materialized KD-tree (PMKD), follows the PMD paradigm
                 using separate data and verification indexes. An
                 empirical evaluation with real data shows that the PMD
                 methodology is superior to the traditional approach for
                 spatial queries too.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Authentication in outsourced databases; Query result
                 verification",
}

@Article{Garofalakis:2009:SIB,
  author =       "Minos Garofalakis and Johannes Gehrke and Divesh
                 Srivastava",
  title =        "Special issue: best papers of {VLDB 2007}",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "383--384",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0132-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abadi:2009:SSV,
  author =       "Daniel J. Abadi and Adam Marcus and Samuel R. Madden
                 and Kate Hollenbach",
  title =        "{SW-Store}: a vertically partitioned {DBMS} for
                 {Semantic Web} data management",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "385--406",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0125-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Efficient management of RDF data is an important
                 prerequisite for realizing the Semantic Web vision.
                 Performance and scalability issues are becoming
                 increasingly pressing as Semantic Web technology is
                 applied to real-world applications. In this paper, we
                 examine the reasons why current data management
                 solutions for RDF data scale poorly, and explore the
                 fundamental scalability limitations of these
                 approaches. We review the state of the art for
                 improving performance of RDF databases and consider a
                 recent suggestion, 'property tables'. We then discuss
                 practically and empirically why this solution has
                 undesirable features. As an improvement, we propose an
                 alternative solution: vertically partitioning the RDF
                 data. We compare the performance of vertical
                 partitioning with prior art on queries generated by a
                 Web-based RDF browser over a large-scale (more than 50
                 million triples) catalog of library data. Our results
                 show that a vertically partitioned schema achieves
                 similar performance to the property table technique
                 while being much simpler to design. Further, if a
                 column-oriented DBMS (a database architected specially
                 for the vertically partitioned case) is used instead of
                 a row-oriented DBMS, another order of magnitude
                 performance improvement is observed, with query times
                 dropping from minutes to several seconds. Encouraged by
                 these results, we describe the architecture of
                 SW-Store, a new DBMS we are actively building that
                 implements these techniques to achieve high performance
                 RDF data management.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Arai:2009:AMT,
  author =       "Benjamin Arai and Gautam Das and Dimitrios Gunopulos
                 and Nick Koudas",
  title =        "Anytime measures for top-$k$ algorithms on exact and
                 fuzzy data sets",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "407--427",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0127-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Top- k queries on large multi-attribute data sets are
                 fundamental operations in information retrieval and
                 ranking applications. In this article, we initiate
                 research on the anytime behavior of top- k algorithms
                 on exact and fuzzy data. In particular, given specific
                 top- k algorithms (TA and TA-Sorted) we are interested
                 in studying their progress toward identification of the
                 correct result at any point during the algorithms'
                 execution. We adopt a probabilistic approach where we
                 seek to report at any point of operation of the
                 algorithm the confidence that the top- k result has
                 been identified. Such a functionality can be a valuable
                 asset when one is interested in reducing the runtime
                 cost of top- k computations. We present a thorough
                 experimental evaluation to validate our techniques
                 using both synthetic and real data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Anytime; Approximate query; Fuzzy data; Top-k",
}

@Article{Chen:2009:AKD,
  author =       "Bee-Chung Chen and Kristen Lefevre and Raghu
                 Ramakrishnan",
  title =        "Adversarial-knowledge dimensions in data privacy",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "429--467",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0118-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Privacy is an important issue in data publishing. Many
                 organizations distribute non-aggregate personal data
                 for research, and they must take steps to ensure that
                 an adversary cannot predict sensitive information
                 pertaining to individuals with high confidence. This
                 problem is further complicated by the fact that, in
                 addition to the published data, the adversary may also
                 have access to other resources (e.g., public records
                 and social networks relating individuals), which we
                 call adversarial knowledge. A robust privacy framework
                 should allow publishing organizations to analyze data
                 privacy by means of not only data dimensions (data that
                 a publishing organization has), but also
                 adversarial-knowledge dimensions (information not in
                 the data). In this paper, we first describe a general
                 framework for reasoning about privacy in the presence
                 of adversarial knowledge. Within this framework, we
                 propose a novel multidimensional approach to
                 quantifying adversarial knowledge. This approach allows
                 the publishing organization to investigate privacy
                 threats and enforce privacy requirements in the
                 presence of various types and amounts of adversarial
                 knowledge. Our main technical contributions include a
                 multidimensional privacy criterion that is more
                 intuitive and flexible than previous approaches to
                 modeling background knowledge. In addition, we identify
                 an important congregation property of the
                 adversarial-knowledge dimensions. Based on this
                 property, we provide algorithms for measuring
                 disclosure and sanitizing data that improve
                 computational efficiency several orders of magnitude
                 over the best known techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Anonymization; Knowledge expression;
                 Privacy-preserving data publishing; Probabilistic
                 inference; Skyline; Worst-case privacy",
}

@Article{Dong:2009:DIU,
  author =       "Xin Luna Dong and Alon Halevy and Cong Yu",
  title =        "Data integration with uncertainty",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "469--500",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0119-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper reports our first set of results on
                 managing uncertainty in data integration. We posit that
                 data-integration systems need to handle uncertainty at
                 three levels and do so in a principled fashion. First,
                 the semantic mappings between the data sources and the
                 mediated schema may be approximate because there may be
                 too many of them to be created and maintained or
                 because in some domains (e.g., bioinformatics) it is
                 not clear what the mappings should be. Second, the data
                 from the sources may be extracted using information
                 extraction techniques and so may yield erroneous data.
                 Third, queries to the system may be posed with keywords
                 rather than in a structured form. As a first step to
                 building such a system, we introduce the concept of
                 probabilistic schema mappings and analyze their formal
                 foundations. We show that there are two possible
                 semantics for such mappings: by-table semantics assumes
                 that there exists a correct mapping but we do not know
                 what it is; by-tuple semantics assumes that the correct
                 mapping may depend on the particular tuple in the
                 source data. We present the query complexity and
                 algorithms for answering queries in the presence of
                 probabilistic schema mappings, and we describe an
                 algorithm for efficiently computing the top- k answers
                 to queries in such a setting. Finally, we consider
                 using probabilistic mappings in the scenario of data
                 exchange.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data exchange; Data integration; Probabilistic schema
                 mapping",
}

@Article{Gedik:2009:CPS,
  author =       "Bu{\u{g}}ra Gedik and Rajesh R. Bordawekar and Philip
                 S. Yu",
  title =        "{CellJoin}: a parallel stream join operator for the
                 {Cell} processor",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "501--519",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0116-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Low-latency and high-throughput processing are key
                 requirements of data stream management systems (DSMSs).
                 Hence, multi-core processors that provide high
                 aggregate processing capacity are ideal matches for
                 executing costly DSMS operators. The recently developed
                 Cell processor is a good example of a heterogeneous
                 multi-core architecture and provides a powerful
                 platform for executing data stream operators with
                 high-performance. On the down side, exploiting the full
                 potential of a multi-core processor like Cell is often
                 challenging, mainly due to the heterogeneous nature of
                 the processing elements, the software managed local
                 memory at the co-processor side, and the unconventional
                 programming model in general. In this paper, we study
                 the problem of scalable execution of windowed stream
                 join operators on multi-core processors, and
                 specifically on the Cell processor. By examining
                 various aspects of join execution flow, we determine
                 the right set of techniques to apply in order to
                 minimize the sequential segments and maximize
                 parallelism. Concretely, we show that basic windows
                 coupled with low-overhead pointer-shifting techniques
                 can be used to achieve efficient join window
                 partitioning, column-oriented join window organization
                 can be used to minimize scattered data transfers,
                 delay-optimized double buffering can be used for
                 effective pipelining, rate-aware batching can be used
                 to balance join throughput and tuple delay, and finally
                 single-instruction multiple-data (SIMD) optimized
                 operator code can be used to exploit data parallelism.
                 Our experimental results show that, following the
                 design guidelines and implementation techniques
                 outlined in this paper, windowed stream joins can
                 achieve high scalability (linear in the number of
                 co-processors) by making efficient use of the extensive
                 hardware parallelism provided by the Cell processor
                 (reaching data processing rates of $\approx$ 13 GB/s)
                 and significantly surpass the performance obtained form
                 conventional high-end processors (supporting a combined
                 input stream rate of 2,000 tuples/s using 15 min
                 windows and without dropping any tuples, resulting in
                 $\approx$ 8.3 times higher output rate compared to an
                 SSE implementation on dual 3.2 GHz Intel Xeon).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Schnaitter:2009:DER,
  author =       "Karl Schnaitter and Joshua Spiegel and Neoklis
                 Polyzotis",
  title =        "Depth estimation for ranking query optimization",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "521--542",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0124-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A relational ranking query uses a scoring function to
                 limit the results of a conventional query to a small
                 number of the most relevant answers. The increasing
                 popularity of this query paradigm has led to the
                 introduction of specialized rank join operators that
                 integrate the selection of top tuples with join
                 processing. These operators access just 'enough' of the
                 input in order to generate just 'enough' output and can
                 offer significant speed-ups for query evaluation. The
                 number of input tuples that an operator accesses is
                 called the input depth of the operator, and this is the
                 driving cost factor in rank join processing. This
                 introduces the important problem of depth estimation,
                 which is crucial for the costing of rank join operators
                 during query compilation and thus for their integration
                 in optimized physical plans. We introduce an estimation
                 methodology, termed deep, for approximating the input
                 depths of rank join operators in a physical execution
                 plan. At the core of deep lies a general, principled
                 framework that formalizes depth computation in terms of
                 the joint distribution of scores in the base tables.
                 This framework results in a systematic estimation
                 methodology that takes the characteristics of the data
                 directly into account and thus enables more accurate
                 estimates. We develop novel estimation algorithms that
                 provide an efficient realization of the formal deep
                 framework, and describe their integration on top of the
                 statistics module of an existing query optimizer. We
                 validate the performance of deep with an extensive
                 experimental study on data sets of varying
                 characteristics. The results verify the effectiveness
                 of deep as an estimation method and demonstrate its
                 advantages over previously proposed techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data statistics; DEEP; Depth estimation; Query
                 optimization; Relational ranking query; Top-k",
}

@Article{Shao:2009:EKS,
  author =       "Feng Shao and Lin Guo and Chavdar Botev and Anand
                 Bhaskar and Muthiah Chettiar and Fan Yang and Jayavel
                 Shanmugasundaram",
  title =        "Efficient keyword search over virtual {XML} views",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "543--570",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0126-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Emerging applications such as personalized portals,
                 enterprise search, and web integration systems often
                 require keyword search over semi-structured views.
                 However, traditional information retrieval techniques
                 are likely to be expensive in this context because they
                 rely on the assumption that the set of documents being
                 searched is materialized. In this paper, we present a
                 system architecture and algorithm that can efficiently
                 evaluate keyword search queries over virtual
                 (unmaterialized) XML views. An interesting aspect of
                 our approach is that it exploits indices present on the
                 base data and thereby avoids materializing large parts
                 of the view that are not relevant to the query results.
                 Another feature of the algorithm is that by solely
                 using indices, we can still score the results of
                 queries over the virtual view, and the resulting scores
                 are the same as if the view was materialized. Our
                 performance evaluation using the INEX data set in the
                 Quark (Bhaskar et al. in Quark: an efficient XQuery
                 full-text implementation. In: SIGMOD, 2006) open-source
                 XML database system indicates that the proposed
                 approach is scalable and efficient.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Document projections; Document pruning; Keyword
                 search; Top-K; XML views",
}

@Article{Wu:2009:GEV,
  author =       "Mingxi Wu and Chris Jermaine",
  title =        "Guessing the extreme values in a data set: a
                 {Bayesian} method and its applications",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "2",
  pages =        "571--597",
  month =        apr,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0133-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 13 09:15:13 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "For a large number of data management problems, it
                 would be very useful to be able to obtain a few samples
                 from a data set, and to use the samples to guess the
                 largest (or smallest) value in the entire data set.
                 Min/max online aggregation, Top-k query processing,
                 outlier detection, and distance join are just a few
                 possible applications. This paper details a
                 statistically rigorous, Bayesian approach to attacking
                 this problem. Just as importantly, we demonstrate the
                 utility of our approach by showing how it can be
                 applied to four specific problems that arise in the
                 context of data management.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Bayesian; Extreme values; Monte Carlo; Online
                 aggregation; Sampling",
}

@Article{Hill:2009:ROJ,
  author =       "Gerhard Hill and Andrew Ross",
  title =        "Reducing outer joins",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "599--610",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0110-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present a method for transforming some outer joins
                 to inner joins and describe a generalized semijoin
                 reduction technique. The first part of the paper shows
                 how to transform a given outer join query whose join
                 graph is a tree to an equivalent inner join query. The
                 method uses derived relations and join predicates.
                 Derived relations contain columns corresponding to join
                 conditions and may have virtual row identifiers, rows
                 and attribute values. The constructed inner join query,
                 after elimination of virtual row identifiers, has the
                 same join tuples as the outer join query. Both the
                 theoretical maximum number of virtual rows and the
                 average number in practice are shown to be low. The
                 method confines consideration of the non-associativity
                 of outer joins to a single step. The second part of the
                 paper generalizes to outer joins the well known
                 technique of semijoin reduction of inner joins. It does
                 so by defining the notions of influencing and needing,
                 and using them to define full reduction and reduction
                 plans. The technique is applied here to perform one
                 step of the method presented in the first part.
                 Semijoin reduction is useful in practice for executing
                 join queries in distributed databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Efficient join evaluation; Join transformation; Outer
                 join evaluation; Semijoin reduction; Virtual row
                 method",
}

@Article{Keogh:2009:SEI,
  author =       "Eamonn Keogh and Li Wei and Xiaopeng Xi and Michail
                 Vlachos and Sang-Hee Lee and Pavlos Protopapas",
  title =        "Supporting exact indexing of arbitrarily rotated
                 shapes and periodic time series under {Euclidean} and
                 warping distance measures",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "611--630",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0111-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Shape matching and indexing is important topic in its
                 own right, and is a fundamental subroutine in most
                 shape data mining algorithms. Given the ubiquity of
                 shape, shape matching is an important problem with
                 applications in domains as diverse as biometrics,
                 industry, medicine, zoology and anthropology. The
                 distance/similarity measure for used for shape matching
                 must be invariant to many distortions, including scale,
                 offset, noise, articulation, partial occlusion, etc.
                 Most of these distortions are relatively easy to
                 handle, either in the representation of the data or in
                 the similarity measure used. However, rotation
                 invariance is noted in the literature as being an
                 especially difficult challenge. Current approaches
                 typically try to achieve rotation invariance in the
                 representation of the data, at the expense of
                 discrimination ability, or in the distance measure, at
                 the expense of efficiency. In this work, we show that
                 we can take the slow but accurate approaches and
                 dramatically speed them up. On real world problems our
                 technique can take current approaches and make them
                 four orders of magnitude faster without false
                 dismissals. Moreover, our technique can be used with
                 any of the dozens of existing shape representations and
                 with all the most popular distance measures including
                 Euclidean distance, dynamic time warping and Longest
                 Common Subsequence. We further show that our indexing
                 technique can be used to index star light curves, an
                 important type of astronomical data, without
                 modification.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Dynamic time warping; Indexing; Shape",
}

@Article{Yang:2009:AIO,
  author =       "Yin Yang and Stavros Papadopoulos and Dimitris
                 Papadias and George Kollios",
  title =        "Authenticated indexing for outsourced spatial
                 databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "631--648",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0113-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In spatial database outsourcing, a data owner
                 delegates its data management tasks to a location-based
                 service (LBS), which indexes the data with an
                 authenticated data structure (ADS). The LBS receives
                 queries (ranges, nearest neighbors) originating from
                 several clients/subscribers. Each query initiates the
                 computation of a verification object (VO) based on the
                 ADS. The VO is returned to the client that can verify
                 the result correctness using the public key of the
                 owner. Our first contribution is the MR-tree, a
                 space-efficient ADS that supports fast query processing
                 and verification. Our second contribution is the
                 MR*-tree, a modified version of the MR-tree, which
                 significantly reduces the VO size through a novel
                 embedding technique. Finally, whereas most ADSs must be
                 constructed and maintained by the owner, we outsource
                 the MR- and MR*-tree construction and maintenance to
                 the LBS, thus relieving the owner from this
                 computationally intensive task.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Authenticated index; Database outsourcing; Mobile
                 computing; Spatial database",
}

@Article{Quiane-Ruiz:2009:SAQ,
  author =       "Jorge-Arnulfo Quian{\'e}-Ruiz and Philippe Lamarre and
                 Patrick Valduriez",
  title =        "A self-adaptable query allocation framework for
                 distributed information systems",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "649--674",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0114-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In large-scale distributed information systems, where
                 participants are autonomous and have special interests
                 for some queries, query allocation is a challenge. Much
                 work in this context has focused on distributing
                 queries among providers in a way that maximizes overall
                 performance (typically throughput and response time).
                 However, preserving the participants' interests is also
                 important. In this paper, we make the following
                 contributions. First, we provide a model to define the
                 participants' perception of the system regarding their
                 interests and propose measures to evaluate the quality
                 of query allocation methods. Then, we propose a
                 framework for query allocation called
                 Satisfaction-based Query Load Balancing (SQLB, for
                 short), which dynamically trades consumers' interests
                 for providers' interests based on their satisfaction.
                 Finally, we compare SQLB, through experimentation, with
                 two important baseline query allocation methods, namely
                 Capacity based and Mariposa-like. The results
                 demonstrate that SQLB yields high efficiency while
                 satisfying the participants' interests and
                 significantly outperforms the baseline methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Distributed information systems; Query allocation;
                 Queryload balancing; Satisfaction",
}

@Article{Deng:2009:IOQ,
  author =       "Ke Deng and Xiaofang Zhou and Heng Tao Shen and Shazia
                 Sadiq and Xue Li",
  title =        "Instance optimal query processing in spatial
                 networks",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "675--693",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0115-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The performance optimization of query processing in
                 spatial networks focuses on minimizing network data
                 accesses and the cost of network distance calculations.
                 This paper proposes algorithms for network k -NN
                 queries, range queries, closest-pair queries and
                 multi-source skyline queries based on a novel
                 processing framework, namely, incremental lower bound
                 constraint. By giving high processing priority to the
                 query associated data points and utilizing the
                 incremental nature of the lower bound, the performance
                 of our algorithms is better optimized in contrast to
                 the corresponding algorithms based on known framework
                 incremental Euclidean restriction and incremental
                 network expansion. More importantly, the proposed
                 algorithms are proven to be instance optimal among
                 classes of algorithms. Through experiments on real road
                 network datasets, the superiority of the proposed
                 algorithms is demonstrated.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Incremental lower bound constraint; Instance
                 optimality; Spatial networks; Spatial queries",
}

@Article{Yiu:2009:MDT,
  author =       "Man Lung Yiu and Nikos Mamoulis",
  title =        "Multi-dimensional top-$k$ dominating queries",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "695--718",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0117-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The top- k dominating query returns k data objects
                 which dominate the highest number of objects in a
                 dataset. This query is an important tool for decision
                 support since it provides data analysts an intuitive
                 way for finding significant objects. In addition, it
                 combines the advantages of top- k and skyline queries
                 without sharing their disadvantages: (i) the output
                 size can be controlled, (ii) no ranking functions need
                 to be specified by users, and (iii) the result is
                 independent of the scales at different dimensions.
                 Despite their importance, top- k dominating queries
                 have not received adequate attention from the research
                 community. This paper is an extensive study on the
                 evaluation of top- k dominating queries. First, we
                 propose a set of algorithms that apply on indexed
                 multi-dimensional data. Second, we investigate query
                 evaluation on data that are not indexed. Finally, we
                 study a relaxed variant of the query which considers
                 dominance in dimensional subspaces. Experiments using
                 synthetic and real datasets demonstrate that our
                 algorithms significantly outperform a previous
                 skyline-based approach. We also illustrate the
                 applicability of this multi-dimensional analysis query
                 by studying the meaningfulness of its results on real
                 data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Preference dominance; Score counting; Top-k
                 retrieval",
}

@Article{Silva:2009:RTS,
  author =       "Yasin N. Silva and Xiaopeng Xiong and Walid G. Aref",
  title =        "The {RUM-tree}: supporting frequent updates in
                 {R-trees} using memos",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "719--738",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0120-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The problem of frequently updating multi-dimensional
                 indexes arises in many location-dependent applications.
                 While the R-tree and its variants are the dominant
                 choices for indexing multi-dimensional objects, the
                 R-tree exhibits inferior performance in the presence of
                 frequent updates. In this paper, we present an R-tree
                 variant, termed the RUM-tree (which stands for R-tree
                 with update memo) that reduces the cost of object
                 updates. The RUM-tree processes updates in a memo-based
                 approach that avoids disk accesses for purging old
                 entries during an update process. Therefore, the cost
                 of an update operation in the RUM-tree is reduced to
                 the cost of only an insert operation. The removal of
                 old object entries is carried out by a garbage cleaner
                 inside the RUM-tree. In this paper, we present the
                 details of the RUM-tree and study its properties. We
                 also address the issues of crash recovery and
                 concurrency control for the RUM-tree. Theoretical
                 analysis and comprehensive experimental evaluation
                 demonstrate that the RUM-tree outperforms other R-tree
                 variants by up to one order of magnitude in scenarios
                 with frequent updates.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Frequent updates; Indexing techniques; Performance;
                 Spatio-temporal databases",
}

@Article{Kriakov:2009:STM,
  author =       "Vassil Kriakov and George Kollios and Alex Delis",
  title =        "Self-tuning management of update-intensive
                 multidimensional data in clusters of workstations",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "739--764",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0121-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Contemporary applications continuously modify large
                 volumes of multidimensional data that must be accessed
                 efficiently and, more importantly, must be updated in a
                 timely manner. Single-server storage approaches are
                 insufficient when managing such volumes of data, while
                 the high frequency of data modification render
                 classical indexing methods inefficient. To address
                 these two problems we introduce a distributed storage
                 manager for multidimensional data based on a
                 Cluster-of-Workstations. The manager addresses the
                 above challenges through a set of mechanisms that,
                 through selective on-line data reorganization,
                 collectively maintain a balanced load across a cluster
                 of workstations. With the help of both a highly
                 efficient and speedy self-tuning mechanism, based on a
                 new data structure called stat -index, as well as a
                 query aggregation and clustering algorithm, our storage
                 manager attains short query response times even in the
                 presence of massive modifications and highly skewed
                 access patterns. Furthermore, we provide a data
                 migration cost model used to determine the best data
                 redistribution strategy. Through extensive
                 experimentation with our prototype, we establish that
                 our storage manager can sustain significant update
                 rates with minimal overhead.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Cluster of workstations; Multi-dimensional data;
                 Self-tuning storage",
}

@Article{Cohen:2009:EQS,
  author =       "Sara Cohen",
  title =        "Equivalence of queries that are sensitive to
                 multiplicities",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "765--785",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0122-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The query equivalence problem has been studied
                 extensively for set-semantics and, more recently, for
                 bag and bag-set semantics. However, SQL queries often
                 combine set, bag and bag-set semantics. For example, an
                 SQL query that returns a multiset of elements may call
                 a subquery or view that returns a set of elements.
                 Queries may access both relations that do not contain
                 duplicates, as well as relations with duplicates. As
                 another example, in SQL one can compute a
                 multiset-union of queries, each of which returns a set
                 of answers. This paper presents combined semantics,
                 which formally models query evaluation combining set,
                 bag and bag-set semantics. The equivalence problem for
                 queries evaluated under combined semantics is studied.
                 A sufficient condition for equivalence is presented.
                 For several important common classes of queries
                 necessary and sufficient conditions for equivalence are
                 presented.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Bag semantics; Combined semantics; Datalog; Query
                 equivalence; Set semantics",
}

@Article{Lian:2009:EPP,
  author =       "Xiang Lian and Lei Chen",
  title =        "Efficient processing of probabilistic reverse nearest
                 neighbor queries over uncertain data",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "787--808",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0123-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Reverse nearest neighbor (RNN) search is very crucial
                 in many real applications. In particular, given a
                 database and a query object, an RNN query retrieves all
                 the data objects in the database that have the query
                 object as their nearest neighbors. Often, due to
                 limitation of measurement devices, environmental
                 disturbance, or characteristics of applications (for
                 example, monitoring moving objects), data obtained from
                 the real world are uncertain (imprecise). Therefore,
                 previous approaches proposed for answering an RNN query
                 over exact (precise) database cannot be directly
                 applied to the uncertain scenario. In this paper, we
                 re-define the RNN query in the context of uncertain
                 databases, namely probabilistic reverse nearest
                 neighbor (PRNN) query, which obtains data objects with
                 probabilities of being RNNs greater than or equal to a
                 user-specified threshold. Since the retrieval of a PRNN
                 query requires accessing all the objects in the
                 database, which is quite costly, we also propose an
                 effective pruning method, called geometric pruning
                 (GP), that significantly reduces the PRNN search space
                 yet without introducing any false dismissals.
                 Furthermore, we present an efficient PRNN query
                 procedure that seamlessly integrates our pruning
                 method. Extensive experiments have demonstrated the
                 efficiency and effectiveness of our proposed GP-based
                 PRNN query processing approach, under various
                 experimental settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Geometric pruning; Probabilistic reverse nearest
                 neighbor; Uncertain databases",
}

@Article{Hua:2009:TTQ,
  author =       "Ming Hua and Jian Pei and Ada W. Fu and Xuemin Lin and
                 Ho-Fung Leung",
  title =        "Top-$k$ typicality queries and efficient query
                 answering methods on large databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "3",
  pages =        "809--835",
  month =        jun,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0128-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:55:19 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Finding typical instances is an effective approach to
                 understand and analyze large data sets. In this paper,
                 we apply the idea of typicality analysis from
                 psychology and cognitive science to database query
                 answering, and study the novel problem of answering
                 top- k typicality queries. We model typicality in large
                 data sets systematically. Three types of top- k
                 typicality queries are formulated. To answer questions
                 like 'Who are the top- k most typical NBA players?',
                 the measure of simple typicality is developed. To
                 answer questions like 'Who are the top- k most typical
                 guards distinguishing guards from other players?', the
                 notion of discriminative typicality is proposed.
                 Moreover, to answer questions like 'Who are the best k
                 typical guards in whole representing different types of
                 guards?', the notion of representative typicality is
                 used. Computing the exact answer to a top- k typicality
                 query requires quadratic time which is often too costly
                 for online query answering on large databases. We
                 develop a series of approximation methods for various
                 situations: (1) the randomized tournament algorithm has
                 linear complexity though it does not provide a
                 theoretical guarantee on the quality of the answers;
                 (2) the direct local typicality approximation using
                 VP-trees provides an approximation quality guarantee;
                 (3) a local typicality tree data structure can be
                 exploited to index a large set of objects. Then,
                 typicality queries can be answered efficiently with
                 quality guarantees by a tournament method based on a
                 Local Typicality Tree. An extensive performance study
                 using two real data sets and a series of synthetic data
                 sets clearly shows that top- k typicality queries are
                 meaningful and our methods are practical.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Efficient query answering; Top-k query; Typicality
                 analysis",
}

@Article{Bawa:2009:PPI,
  author =       "Mayank Bawa and Roberto J. {Bayardo, Jr.} and Rakesh
                 Agrawal and Jaideep Vaidya",
  title =        "Privacy-preserving indexing of documents on the
                 network",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "837--856",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0129-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the ubiquitous collection of data and creation of
                 large distributed repositories, enabling search over
                 this data while respecting access control is critical.
                 A related problem is that of ensuring privacy of the
                 content owners while still maintaining an efficient
                 index of distributed content. We address the problem of
                 providing privacy-preserving search over distributed
                 access-controlled content. Indexed documents can be
                 easily reconstructed from conventional (inverted)
                 indexes used in search. Currently, the need to avoid
                 breaches of access-control through the index requires
                 the index hosting site to be fully secured and trusted
                 by all participating content providers. This level of
                 trust is impractical in the increasingly common case
                 where multiple competing organizations or individuals
                 wish to selectively share content. We propose a
                 solution that eliminates the need of such a trusted
                 authority. The solution builds a centralized
                 privacy-preserving index in conjunction with a
                 distributed access-control enforcing search protocol.
                 Two alternative methods to build the centralized index
                 are proposed, allowing trade offs of efficiency and
                 security. The new index provides strong and
                 quantifiable privacy guarantees that hold even if the
                 entire index is made public. Experiments on a real-life
                 dataset validate performance of the scheme. The appeal
                 of our solution is twofold: (a) content providers
                 maintain complete control in defining access groups and
                 ensuring its compliance, and (b) system implementors
                 retain tunable knobs to balance privacy and efficiency
                 concerns for their particular domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Distributed search; Indexing; Privacy",
}

@Article{Fan:2009:QTX,
  author =       "Wenfei Fan and Jeffrey Xu Yu and Jianzhong Li and
                 Bolin Ding and Lu Qin",
  title =        "Query translation from {XPath} to {SQL} in the
                 presence of recursive {DTDs}",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "857--883",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-008-0131-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We study the problem of evaluating xpath queries over
                 xml data that is stored in an rdbms via schema-based
                 shredding. The interaction between recursion
                 (descendants-axis) in xpath queries and recursion in
                 dtds makes it challenging to answer xpath queries using
                 rdbms. We present a new approach to translating xpath
                 queries into sql queries based on a notion of
                 extended\par

                 XP ath expressions and a simple least fixpoint (lfp)
                 operator. Extended xpath expressions are a mild
                 extension of xpath, and the lfp operator takes a single
                 input relation and is already supported by most
                 commercial rdbms. We show that extended xpath
                 expressions are capable of capturing both dtd recursion
                 and xpath queries in a uniform framework. Furthermore,
                 they can be translated into an equivalent sequence of
                 sql queries with the lfp operator. We present
                 algorithms for rewriting xpath queries over a (possibly
                 recursive) dtd into extended xpath expressions and for
                 translating extended xpath expressions to sql queries,
                 as well as optimization techniques. The novelty of our
                 approach consists in its capability to answer a large
                 class of xpath queries by means of only low-end rdbms
                 features already available in most rdbms, as well as
                 its flexibility to accommodate existing relational
                 query optimization techniques. In addition, these
                 translation algorithms provide a solution to query
                 answering for certain (possibly recursive) xml views of
                 xml data. Our experimental results verify the
                 effectiveness of our techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Query translation; Recursive DTD; SQL; XML database;
                 XPath",
}

@Article{Malik:2009:RRA,
  author =       "Zaki Malik and Athman Bouguettaya",
  title =        "{RATEWeb}: {Reputation Assessment} for {Trust
                 Establishment} among {Web} services",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "885--911",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0138-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We introduce RATEWeb, a framework for establishing
                 trust in service-oriented environments. RATEWeb
                 supports a cooperative model in which Web services
                 share their experiences of the service providers with
                 their peers through feedback ratings. The different
                 ratings are aggregated to derive a service provider's
                 reputation. This in turn is used to evaluate trust. The
                 overall goal of RATEWeb is to facilitate trust-based
                 selection and composition of Web services. We propose a
                 set of decentralized techniques that aim at accurately
                 aggregating the submitted ratings for reputation
                 assessment. We conduct experiments to assess the
                 fairness and accuracy of the proposed techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Reputation; Trust; Web service",
}

@Article{Wang:2009:CRE,
  author =       "Fusheng Wang and Shaorong Liu and Peiya Liu",
  title =        "Complex {RFID} event processing",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "913--931",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0139-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Advances of sensor and radio frequency identification
                 (RFID) technology provide significant new power for
                 humans to sense, understand and manage the world. RFID
                 provides fast data collection with precise
                 identification of objects with unique IDs without line
                 of sight, thus it can be used for identifying,
                 locating, tracking and monitoring physical objects.
                 Despite these benefits, RFID poses many challenges for
                 data processing and management: (i) RFID observations
                 have implicit meanings, which have to be transformed
                 and aggregated into semantic data represented in their
                 data models; and (ii) RFID data are temporal,
                 streaming, and in high volume, and have to be processed
                 on the fly. Thus, a general RFID data processing
                 framework is needed to automate the transformation of
                 physical RFID observations into the virtual
                 counterparts in the virtual world linked to business
                 applications. In this paper, we take an event-oriented
                 approach to process RFID data, by devising RFID
                 application logic into complex events. We then
                 formalize the specification and semantics of RFID
                 events and rules. We discover that RFID events are
                 highly temporal constrained, and include
                 non-spontaneous events, and develop an RFID event
                 detection engine that can effectively process complex
                 RFID events. The declarative event-based approach
                 greatly simplifies the work of RFID data processing,
                 and can significantly reduce the cost of RFID data
                 integration.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Complex event; ECA rules; RFID; Temporal",
}

@Article{DuMouza:2009:LSI,
  author =       "C{\'e}dric {Du Mouza} and Witold Litwin and Philippe
                 Rigaux",
  title =        "Large-scale indexing of spatial data in distributed
                 repositories: the {SD}-Rtree",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "933--958",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0135-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose a scalable distributed data structure
                 (SDDS) called SD-Rtree. We intend our structure for
                 point, window and k NN queries over large spatial
                 datasets distributed on clusters of interconnected
                 servers. The structure balances the storage and
                 processing load over the available resources, and aims
                 at minimizing the size of the cluster. SD-Rtree
                 generalizes the well-known Rtree structure. It uses a
                 distributed balanced binary tree that scales with
                 insertions to potentially any number of storage servers
                 through splits of the overloaded ones. A
                 user/application manipulates the structure from a
                 client node. The client addresses the tree through its
                 image that can be possibly outdated due to later split.
                 This may generate addressing errors, solved by the
                 forwarding among the servers. Specific messages towards
                 the clients incrementally correct the outdated images.
                 We present the building of an SD-Rtree through
                 insertions, focusing on the split and rotation
                 algorithms. We follow with the query algorithms. We
                 describe then a flexible allocation protocol which
                 allows to cope with a temporary shortage of storage
                 resources through data storage balancing. Experiments
                 show additional aspects of SD-Rtree and compare its
                 behavior with a distributed quadtree. The results
                 justify our various design choices and the overall
                 utility of the structure.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Distributed structure; Spatial indexing",
}

@Article{Zheng:2009:DSI,
  author =       "Baihua Zheng and Wang-Chien Lee and Ken C. Lee and Dik
                 Lun Lee and Min Shao",
  title =        "A distributed spatial index for error-prone wireless
                 data broadcast",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "4",
  pages =        "959--986",
  month =        aug,
  year =         "2009",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0137-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Sep 15 14:56:20 MDT 2009",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Information is valuable to users when it is available
                 not only at the right time but also at the right place.
                 To support efficient location-based data access in
                 wireless data broadcast systems, a distributed spatial
                 index (called DSI) is presented in this paper. DSI is
                 highly efficient because it has a linear yet fully
                 distributed structure that naturally shares links in
                 different search paths. DSI is very resilient to the
                 error-prone wireless communication environment because
                 interrupted search operations based on DSI can be
                 resumed easily. It supports search algorithms for
                 classical location-based queries such as window queries
                 and k NN queries in both of the snapshot and continuous
                 query modes. In-depth analysis and simulation-based
                 evaluation have been conducted. The results show that
                 DSI significantly out-performs a variant of R-trees
                 tailored for wireless data broadcast environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Error resilience; Location-based query; Mobile
                 computing; Wireless broadcast",
}

@Article{Haas:2009:SIU,
  author =       "Peter J. Haas and Dan Suciu",
  title =        "Special issue on uncertain and probabilistic
                 databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "987--988",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sarma:2009:RUD,
  author =       "Anish Das Sarma and Omar Benjelloun and Alon Halevy
                 and Shubha Nabar and Jennifer Widom",
  title =        "Representing uncertain data: models, properties, and
                 algorithms",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "989--1019",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Antova:2009:WBE,
  author =       "Lyublena Antova and Christoph Koch and Dan Olteanu",
  title =        "$10^{(10^{6})}$ worlds and beyond: efficient
                 representation and processing of incomplete
                 information",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1021--1040",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abiteboul:2009:EPX,
  author =       "Serge Abiteboul and Benny Kimelfeld and Yehoshua Sagiv
                 and Pierre Senellart",
  title =        "On the expressiveness of probabilistic {XML} models",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1041--1064",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sen:2009:PME,
  author =       "Prithviraj Sen and Amol Deshpande and Lise Getoor",
  title =        "{PrDB}: managing and exploiting rich correlations in
                 probabilistic databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1065--1090",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Re:2009:THQ,
  author =       "Christopher R{\'e} and Dan Suciu",
  title =        "The trichotomy of {HAVING} queries on a probabilistic
                 database",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1091--1116",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kimelfeld:2009:QEP,
  author =       "Benny Kimelfeld and Yuri Kosharovsky and Yehoshua
                 Sagiv",
  title =        "Query evaluation over probabilistic {XML}",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1117--1140",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hassanzadeh:2009:CPD,
  author =       "Oktie Hassanzadeh and Ren{\'e}e J. Miller",
  title =        "Creating probabilistic databases from duplicated
                 data",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1141--1166",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wolf:2009:QPI,
  author =       "Garrett Wolf and Aravind Kalavagattu and Hemal Khatri
                 and Raju Balakrishnan and Bhaumik Chokshi and Jianchun
                 Fan and Yi Chen and Subbarao Kambhampati",
  title =        "Query processing over incomplete autonomous databases:
                 query rewriting using learned data dependencies",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1167--1190",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Keulen:2009:QEK,
  author =       "Maurice Keulen and Ander Keijzer",
  title =        "Qualitative effects of knowledge rules and user
                 feedback in probabilistic data integration",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1191--1217",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2009:SPS,
  author =       "Jinchuan Chen and Reynold Cheng and Mohamed Mokbel and
                 Chi-Yin Chow",
  title =        "Scalable processing of snapshot and continuous
                 nearest-neighbor queries over one-dimensional uncertain
                 data",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "5",
  pages =        "1219--1240",
  month =        oct,
  year =         "2009",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:40 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2010:TFD,
  author =       "Keke Chen and Ling Liu",
  title =        "{HE-Tree}: a framework for detecting changes in
                 clustering structure for categorical data streams",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1241--1260",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Whang:2010:GER,
  author =       "Steven Euijong Whang and Omar Benjelloun and Hector
                 Garcia-Molina",
  title =        "Generic entity resolution with negative rules",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1261--1277",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ntarmos:2010:SSI,
  author =       "Nikos Ntarmos and Peter Triantafillou and Gerhard
                 Weikum",
  title =        "Statistical structures for {Internet}-scale data
                 management",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1279--1312",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bramandia:2010:OUR,
  author =       "Ramadhana Bramandia and Jiefeng Cheng and Byron Choi
                 and Jeffrey Xu Yu",
  title =        "Optimizing updates of recursive {XML} views of
                 relations",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1313--1333",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Duntgen:2010:BBM,
  author =       "Christian D{\"u}ntgen and Thomas Behr and Ralf Hartmut
                 G{\"u}ting",
  title =        "{BerlinMOD}: a benchmark for moving object databases",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1335--1368",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mandreoli:2010:PHS,
  author =       "Federica Mandreoli and Riccardo Martoglia and Pavel
                 Zezula",
  title =        "Principles of {Holism} for sequential twig pattern
                 matching",
  journal =      j-VLDB-J,
  volume =       "18",
  number =       "6",
  pages =        "1369--1392",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:44 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Buneman:2010:SIB,
  author =       "Peter Buneman and Volker Markl and Beng Chin Ooi and
                 Kenneth Ross",
  title =        "Special issue: best papers of {VLDB 2008}",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cormode:2010:MFF,
  author =       "Graham Cormode and Marios Hadjieleftheriou",
  title =        "Methods for finding frequent items in data streams",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "3--20",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bruno:2010:CPD,
  author =       "Nicolas Bruno and Surajit Chaudhuri",
  title =        "Constrained physical design tuning",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "21--44",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lizorkin:2010:AEO,
  author =       "Dmitry Lizorkin and Pavel Velikhov and Maxim Grinev
                 and Denis Turdakov",
  title =        "Accuracy estimate and optimization techniques for
                 {SimRank} computation",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "45--66",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Nath:2010:OMV,
  author =       "Suman Nath and Phillip B. Gibbons",
  title =        "Online maintenance of very large random samples on
                 flash storage",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "67--90",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Neumann:2010:RES,
  author =       "Thomas Neumann and Gerhard Weikum",
  title =        "The {RDF-3X} engine for scalable management of {RDF}
                 data",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "91--113",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cormode:2010:ABG,
  author =       "Graham Cormode and Divesh Srivastava and Ting Yu and
                 Qing Zhang",
  title =        "Anonymizing bipartite graph data using safe
                 groupings",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "1",
  pages =        "115--139",
  month =        feb,
  year =         "2010",
  CODEN =        "VLDBFR",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 16 08:21:46 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{U:2010:CSA,
  author =       "Leong Hou U. and Kyriakos Mouratidis and Nikos
                 Mamoulis",
  title =        "Continuous spatial assignment of moving users",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "141--160",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0144-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Consider a set of servers and a set of users, where
                 each server has a coverage region (i.e., an area of
                 service) and a capacity (i.e., a maximum number of
                 users it can serve). Our task is to assign every user
                 to one server subject to the coverage and capacity
                 constraints. To offer the highest quality of service,
                 we wish to minimize the average distance between users
                 and their assigned server. This is an instance of a
                 well-studied problem in operations research, termed
                 optimal assignment. Even though there exist several
                 solutions for the static case (where user locations are
                 fixed), there is currently no method for dynamic
                 settings. In this paper, we consider the continuous
                 assignment problem (CAP), where an optimal assignment
                 must be constantly maintained between mobile users and
                 a set of servers. The fact that the users are mobile
                 necessitates real-time reassignment so that the quality
                 of service remains high (i.e., their distance from
                 their assigned servers is minimized). The large scale
                 and the time-critical nature of targeted applications
                 require fast CAP solutions. We propose an algorithm
                 that utilizes the geometric characteristics of the
                 problem and significantly accelerates the initial
                 assignment computation and its subsequent maintenance.
                 Our method applies to different cost functions (e.g.,
                 average squared distance) and to any Minkowski distance
                 metric (e.g., Euclidean, L$_1$ norm, etc.).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Continuous query; Optimal assignment; Spatial
                 monitoring",
}

@Article{Papadopoulos:2010:CAR,
  author =       "Stavros Papadopoulos and Yin Yang and Dimitris
                 Papadias",
  title =        "Continuous authentication on relational streams",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "161--180",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0145-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "According to the database outsourcing model, a data
                 owner delegates database functionality to a third-party
                 service provider, which answers queries received from
                 clients. Authenticated query processing enables the
                 clients to verify the correctness of query results.
                 Despite the abundance of methods for authenticated
                 processing in conventional databases, there is limited
                 work on outsourced data streams. Stream environments
                 pose new challenges such as the need for fast structure
                 updating, support for continuous query processing and
                 authentication, and provision for temporal
                 completeness. Specifically, in addition to the
                 correctness of individual results, the client must be
                 able to verify that there are no missing results in
                 between data updates. This paper presents a
                 comprehensive set of methods covering relational
                 streams. We first describe REF, a technique that
                 achieves correctness and temporal completeness but
                 incurs false transmissions, i.e., the provider has to
                 inform the clients whenever there is a data update,
                 even if their results are not affected. Then, we
                 propose CADS, which minimizes the processing and
                 transmission overhead through an elaborate indexing
                 scheme and a virtual caching mechanism. In addition, we
                 present an analytical study to determine the optimal
                 indexing granularity, and extend CADS for the case that
                 the data distribution changes over time. Finally, we
                 evaluate the effectiveness of our techniques through
                 extensive experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Authentication; Continuous monitoring; Data streams;
                 Database outsourcing",
}

@Article{Zhang:2010:UMS,
  author =       "Zhenjie Zhang and Hua Lu and Beng Chin Ooi and Anthony
                 K. Tung",
  title =        "Understanding the meaning of a shifted sky: a general
                 framework on extending skyline query",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "181--201",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0148-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Skyline queries are often used on data sets in
                 multi-dimensional space for many decision-making
                 applications. Traditionally, an object p is said to
                 dominate another object q if, for all dimensions, it is
                 no worse than q and is better on at least one
                 dimension. Therefore, the skyline of a data set
                 consists of all objects not dominated by any other
                 object. To better cater to application requirements
                 such as controlling the size of the skyline or handling
                 data sets that are not well-structured, various works
                 have been proposed to extend the definition of skyline
                 based on variants of the dominance relationship. In
                 view of the proliferation of variants, in this paper, a
                 generalized framework is proposed to guide the
                 extension of skyline query from conventional definition
                 to different variants. Our framework explicitly and
                 carefully examines the various properties that should
                 be preserved in a variant of the dominance relationship
                 so that: (1) maintaining original advantages, while
                 extending adaptivity to application semantics, and (2)
                 keeping computational complexity almost unaffected. We
                 prove that traditional dominance is the only
                 relationship satisfying all desirable properties, and
                 present some new dominance relationships by relaxing
                 some of the properties. These relationships are general
                 enough for us to design new top- k skyline queries that
                 return robust results of a controllable size. We
                 analyze the existing skyline algorithms based on their
                 minimum requirements on dominance properties. We also
                 extend our analysis to data sets with missing values,
                 and present extensive experimental results on the
                 combinations of new dominance relationships and skyline
                 algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "General framework; Skyline query",
}

@Article{Lo:2010:FTD,
  author =       "Eric Lo and Carsten Binnig and Donald Kossmann and M.
                 Tamer {\"O}zsu and Wing-Kai Hon",
  title =        "A framework for testing {DBMS} features",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "203--230",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0157-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Testing a specific feature of a DBMS requires
                 controlling the inputs and outputs of the operators in
                 the query execution plan. However, that is practically
                 difficult to achieve because the inputs/outputs of a
                 query depend on the content of the test database. In
                 this paper, we propose a framework to test DBMS
                 features. The framework includes a database generator
                 called QAGen so that the generated test databases are
                 able to meet the test requirements defined on the test
                 queries. The framework also includes a set of tools to
                 automate test case constructions and test executions. A
                 wide range of DBMS feature testing tasks can be
                 facilitated by the proposed framework.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data generation; Database testing; Symbolic execution;
                 Symbolic query processing",
}

@Article{Bonifati:2010:SMQ,
  author =       "Angela Bonifati and Elaine Chang and Terence Ho and
                 Laks V. Lakshmanan and Rachel Pottinger and Yongik
                 Chung",
  title =        "Schema mapping and query translation in heterogeneous
                 {P2P XML} databases",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "231--256",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0159-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Peers in a peer-to-peer data management system often
                 have heterogeneous schemas and no mediated global
                 schema. To translate queries across peers, we assume
                 each peer provides correspondences between its schema
                 and a small number of other peer schemas. We focus on
                 query reformulation in the presence of heterogeneous
                 XML schemas, including data---metadata conflicts. We
                 develop an algorithm for inferring precise mapping
                 rules from informal schema correspondences. We define
                 the semantics of query answering in this setting and
                 develop query translation algorithm. Our translation
                 handles an expressive fragment of XQuery and works both
                 along and against the direction of mapping rules. We
                 describe the HePToX heterogeneous P2P XML data
                 management system which incorporates our results. We
                 report the results of extensive experiments on HePToX
                 on both synthetic and real datasets. We demonstrate our
                 system utility and scalability on different P2P
                 distributions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Heterogeneous Peer-to-Peer XML databases; Schema
                 mapping; XML query translation",
}

@Article{Morfonios:2010:RCL,
  author =       "Konstantinos Morfonios and Yannis Ioannidis",
  title =        "Revisiting the cube lifecycle in the presence of
                 hierarchies",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "257--282",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0160-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "On-line analytical processing (OLAP) typically
                 involves complex aggregate queries over large datasets.
                 The data cube has been proposed as a structure that
                 materializes the results of such queries in order to
                 accelerate OLAP. A significant fraction of the related
                 work has been on Relational-OLAP (ROLAP) techniques,
                 which are based on relational technology. Existing
                 ROLAP cubing solutions mainly focus on 'flat' datasets,
                 which do not include hierarchies in their dimensions.
                 Nevertheless, as shown in this paper, the nature of
                 hierarchies introduces several complications into the
                 entire lifecycle of a data cube including the
                 operations of construction, storage, indexing, query
                 processing, and incremental maintenance. This fact
                 renders existing techniques essentially inapplicable in
                 a significant number of real-world applications and
                 mandates revisiting the entire cube lifecycle under the
                 new perspective. In order to overcome this problem, the
                 CURE algorithm has been recently proposed as an
                 efficient mechanism to construct complete cubes over
                 large datasets with arbitrary hierarchies and store
                 them in a highly compressed format, compatible with the
                 relational model. In this paper, we study the remaining
                 phases in the cube lifecycle and introduce
                 query-processing and incremental-maintenance algorithms
                 for CURE cubes. These are significantly different from
                 earlier approaches, which have been proposed for flat
                 cubes constructed by other techniques and are
                 inadequate for CURE due to its high compression rate
                 and the presence of hierarchies. Our methods address
                 issues such as cube indexing, query optimization, and
                 lazy update policies. Especially regarding updates,
                 such lazy approaches are applied for the first time on
                 cubes. We demonstrate the effectiveness of CURE in all
                 phases of the cube lifecycle through experiments on
                 both real-world and synthetic datasets. Among the
                 experimental results, we distinguish those that have
                 made CURE the first ROLAP technique to complete the
                 construction and usage of the cube of the
                 highest-density dataset in the APB-1 benchmark (12 GB).
                 CURE was in fact quite efficient on this, showing great
                 promise with respect to the potential of the technique
                 overall.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data cube; Incremental maintenance; Lazy update; Query
                 processing",
}

@Article{Zhang:2010:TBP,
  author =       "Wenjie Zhang and Xuemin Lin and Ying Zhang and Jian
                 Pei and Wei Wang",
  title =        "Threshold-based probabilistic top-$k$ dominating
                 queries",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "2",
  pages =        "283--305",
  month =        apr,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0162-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 21 16:41:50 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recently, due to intrinsic characteristics in many
                 underlying data sets, a number of probabilistic queries
                 on uncertain data have been investigated. Top-$k$
                 dominating queries are very important in many
                 applications including decision making in a
                 multidimensional space. In this paper, we study the
                 problem of efficiently computing top-$k$ dominating
                 queries on uncertain data. We first formally define the
                 problem. Then, we develop an efficient, threshold-based
                 algorithm to compute the exact solution. To overcome
                 some inherent computational deficiency in an exact
                 computation, we develop an efficient randomized
                 algorithm with an accuracy guarantee. Our extensive
                 experiments demonstrate that both algorithms are quite
                 efficient, while the randomized algorithm is quite
                 scalable against data set sizes, object areas, $k$
                 values, etc. The randomized algorithm is also highly
                 accurate in practice.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Dominating relation; Top $k$; Uncertain objects",
}

@Article{Nutanong:2010:AEV,
  author =       "Sarana Nutanong and Rui Zhang and Egemen Tanin and
                 Lars Kulik",
  title =        "Analysis and evaluation of {V*-kNN}: an efficient
                 algorithm for moving {kNN} queries",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "307--332",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0163-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The moving $k$ nearest neighbor (M k NN) query
                 continuously finds the $k$ nearest neighbors of a
                 moving query point. M k NN queries can be efficiently
                 processed through the use of safe regions. In general,
                 a safe region is a region within which the query point
                 can move without changing the query answer. This paper
                 presents an incremental safe-region-based technique for
                 answering M k NN queries, called the V*-Diagram, as
                 well as analysis and evaluation of its associated
                 algorithm, V*-kNN. Traditional safe-region approaches
                 compute a safe region based on the data objects but
                 independent of the query location. Our approach
                 exploits the knowledge of the query location and the
                 boundary of the search space in addition to the data
                 objects. As a result, V*-kNN has much smaller I/O and
                 computation costs than existing methods. We further
                 provide cost models to estimate the number of data
                 accesses for V*-kNN and a competitive technique,
                 RIS-kNN. The V*-Diagram and V*-kNN are also applicable
                 to the domain of spatial networks and we present
                 algorithms to construct a spatial-network V*-Diagram.
                 Our experimental results show that V*-kNN significantly
                 outperforms the competitive technique. The results also
                 verify the accuracy of the cost models.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Nearest neighbor search; Spatial databases",
}

@Article{Lee:2010:ZSE,
  author =       "Ken C. Lee and Wang-Chien Lee and Baihua Zheng and
                 Huajing Li and Yuan Tian",
  title =        "{Z-SKY}: an efficient skyline query processing
                 framework based on {Z}-order",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "333--362",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0166-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a set of data points in a multidimensional
                 space, a skyline query retrieves those data points that
                 are not dominated by any other point in the same
                 dataset. Observing that the properties of Z-order space
                 filling curves (or Z-order curves) perfectly match with
                 the dominance relationships among data points in a
                 geometrical data space, we, in this paper, develop and
                 present a novel and efficient processing framework to
                 evaluate skyline queries and their variants, and to
                 support skyline result updates based on Z-order curves.
                 This framework consists of ZBtree, i.e., an index
                 structure to organize a source dataset and skyline
                 candidates, and a suite of algorithms, namely, (1)
                 ZSearch, which processes skyline queries, (2) ZInsert,
                 ZDelete and ZUpdate, which incrementally maintain
                 skyline results in presence of source dataset updates,
                 (3) ZBand, which answers skyband queries, (4) ZRank,
                 which returns top-ranked skyline points, (5) k-ZSearch,
                 which evaluates k -dominant skyline queries, and (6)
                 ZSubspace, which supports skyline queries on a subset
                 of dimensions. While derived upon coherent ideas and
                 concepts, our approaches are shown to outperform the
                 state-of-the-art algorithms that are specialized to
                 address particular skyline problems, especially when a
                 large number of skyline points are resulted, via
                 comprehensive experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Index; Search algorithm; Skyline query; Skyline query
                 result update; Z-order space filling curve",
}

@Article{Yiu:2010:ESS,
  author =       "Man Lung Yiu and Gabriel Ghinita and Christian S.
                 Jensen and Panos Kalnis",
  title =        "Enabling search services on outsourced private spatial
                 data",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "363--384",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0169-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Cloud computing services enable organizations and
                 individuals to outsource the management of their data
                 to a service provider in order to save on hardware
                 investments and reduce maintenance costs. Only
                 authorized users are allowed to access the data. Nobody
                 else, including the service provider, should be able to
                 view the data. For instance, a real-estate company that
                 owns a large database of properties wants to allow its
                 paying customers to query for houses according to
                 location. On the other hand, the untrusted service
                 provider should not be able to learn the property
                 locations and, e.g., selling the information to a
                 competitor. To tackle the problem, we propose to
                 transform the location datasets before uploading them
                 to the service provider. The paper develops a spatial
                 transformation that re-distributes the locations in
                 space, and it also proposes a cryptographic-based
                 transformation. The data owner selects the
                 transformation key and shares it with authorized users.
                 Without the key, it is infeasible to reconstruct the
                 original data points from the transformed points. The
                 proposed transformations present distinct trade-offs
                 between query efficiency and data confidentiality. In
                 addition, we describe attack models for studying the
                 security properties of the transformations. Empirical
                 studies demonstrate that the proposed methods are
                 efficient and applicable in practice.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data outsourcing; Spatial query processing",
}

@Article{Hintoglu:2010:SMP,
  author =       "Ay{\c{c}}a Azgin Hintoglu and Y{\"u}cel Sayg{\i}n",
  title =        "Suppressing microdata to prevent classification based
                 inference",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "385--410",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0170-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The revolution of the Internet together with the
                 progression in computer technology makes it easy for
                 institutions to collect an unprecedented amount of
                 personal data. This pervasive data collection rally
                 coupled with the increasing necessity of dissemination
                 and sharing of non-aggregated data, i.e., microdata,
                 raised a lot of concerns about privacy. One method to
                 ensure privacy is to selectively hide the confidential,
                 i.e. sensitive, information before disclosure. However,
                 with data mining techniques, it is now possible for an
                 adversary to predict the hidden confidential
                 information from the disclosed data sets. In this
                 paper, we concentrate on one such data mining technique
                 called classification. We extend our previous work on
                 microdata suppression to prevent both probabilistic and
                 decision tree classification based inference. We also
                 provide experimental results showing the effectiveness
                 of not only the proposed methods but also the hybrid
                 methods, i.e., methods suppressing microdata against
                 both classification models, on real-life data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Data mining; Data perturbation; Data suppression;
                 Disclosure protection; Privacy",
}

@Article{Jin:2010:SWT,
  author =       "Cheqing Jin and Ke Yi and Lei Chen and Jeffrey Xu Yu
                 and Xuemin Lin",
  title =        "Sliding-window top-$k$ queries on uncertain streams",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "411--435",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0171-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recently, due to the imprecise nature of the data
                 generated from a variety of streaming applications,
                 such as sensor networks, query processing on uncertain
                 data streams has become an important problem. However,
                 all the existing works on uncertain data streams study
                 unbounded streams. In this paper, we take the first
                 step towards the important and challenging problem of
                 answering sliding-window queries on uncertain data
                 streams, with a focus on one of the most important
                 types of queries--top- k queries. It is nontrivial to
                 find an efficient solution for answering sliding-window
                 top- k queries on uncertain data streams, because
                 challenges not only stem from the strict space and time
                 requirements of processing both arriving and expiring
                 tuples in high-speed streams, but also rise from the
                 exponential blowup in the number of possible worlds
                 induced by the uncertain data model. In this paper, we
                 design a unified framework for processing
                 sliding-window top- k queries on uncertain streams. We
                 show that all the existing top- k definitions in the
                 literature can be plugged into our framework, resulting
                 in several succinct synopses that use space much
                 smaller than the window size, while they are also
                 highly efficient in terms of processing time. We also
                 extend our framework to answering multiple top- k
                 queries. In addition to the theoretical space and time
                 bounds that we prove for these synopses, we present a
                 thorough experimental report to verify their practical
                 efficiency on both synthetic and real data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Sliding-window; Top-k query; Uncertain stream",
}

@Article{Pang:2010:EPE,
  author =       "Hweehwa Pang and Xuhua Ding and Baihua Zheng",
  title =        "Efficient processing of exact top-$k$ queries over
                 disk-resident sorted lists",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "3",
  pages =        "437--456",
  month =        jun,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0174-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:05:52 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The top- k query is employed in a wide range of
                 applications to generate a ranked list of data that
                 have the highest aggregate scores over certain
                 attributes. As the pool of attributes for selection by
                 individual queries may be large, the data are indexed
                 with per-attribute sorted lists, and a threshold
                 algorithm (TA) is applied on the lists involved in each
                 query. The TA executes in two phases--find a cut-off
                 threshold for the top- k result scores, then evaluate
                 all the records that could score above the threshold.
                 In this paper, we focus on exact top- k queries that
                 involve monotonic linear scoring functions over
                 disk-resident sorted lists. We introduce a model for
                 estimating the depths to which each sorted list needs
                 to be processed in the two phases, so that (most of)
                 the required records can be fetched efficiently through
                 sequential or batched I/Os. We also devise a mechanism
                 to quickly rank the data that qualify for the query
                 answer and to eliminate those that do not, in order to
                 reduce the computation demand of the query processor.
                 Extensive experiments with four different datasets
                 confirm that our schemes achieve substantial
                 performance speed-up of between two times and two
                 orders of magnitude over existing TAs, at the expense
                 of a memory overhead of 4.8 bits per attribute value.
                 Moreover, our scheme is robust to different data
                 distributions and query characteristics.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Bloom filter; Threshold algorithm; Top-k query
                 processing",
}

@Article{Murugesan:2010:EPP,
  author =       "Mummoorthy Murugesan and Wei Jiang and Chris Clifton
                 and Luo Si and Jaideep Vaidya",
  title =        "Efficient privacy-preserving similar document
                 detection",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "457--475",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0175-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Similar document detection plays important roles in
                 many applications, such as file management, copyright
                 protection, plagiarism prevention, and duplicate
                 submission detection. The state of the art protocols
                 assume that the contents of files stored on a server
                 (or multiple servers) are directly accessible. However,
                 this makes such protocols unsuitable for any
                 environment where the documents themselves are
                 sensitive and cannot be openly read. Essentially, this
                 assumption limits more practical applications, e.g.,
                 detecting plagiarized documents between two
                 conferences, where submissions are confidential. We
                 propose novel protocols to detect similar documents
                 between two entities where documents cannot be openly
                 shared with each other. The similarity measure used can
                 be a simple cosine similarity on entire documents or on
                 document fragments, enabling detection of partial
                 copying. We conduct extensive experiments to show the
                 practical value of the proposed protocols. While the
                 proposed base protocols are much more efficient than
                 the general secure multiparty computation based
                 solutions, they are still slow for large document sets.
                 We then investigate a clustering based approach that
                 significantly reduces the running time and achieves
                 over 90\% of accuracy in our experiments. This makes
                 secure similar document detection both practical and
                 feasible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Information retrieval; Privacy",
}

@Article{Soliman:2010:SRQ,
  author =       "Mohamed A. Soliman and Ihab F. Ilyas and Shalev
                 Ben-David",
  title =        "Supporting ranking queries on uncertain and incomplete
                 data",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "477--501",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0176-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Large databases with uncertain information are
                 becoming more common in many applications including
                 data integration, location tracking, and Web search. In
                 these applications, ranking records with uncertain
                 attributes introduces new problems that are
                 fundamentally different from conventional ranking.
                 Specifically, uncertainty in records' scores induces a
                 partial order over records, as opposed to the total
                 order that is assumed in the conventional ranking
                 settings. In this paper, we present a new probabilistic
                 model, based on partial orders, to encapsulate the
                 space of possible rankings originating from score
                 uncertainty. Under this model, we formulate several
                 ranking query types with different semantics. We
                 describe and analyze a set of efficient query
                 evaluation algorithms. We show that our techniques can
                 be used to solve the problem of rank aggregation in
                 partial orders under two widely adopted distance
                 metrics. In addition, we design sampling techniques
                 based on Markov chains to compute approximate query
                 answers. Our experimental evaluation uses both real and
                 synthetic data. The experimental study demonstrates the
                 efficiency and effectiveness of our techniques under
                 various configurations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Kendall tau; Partial orders; Probabilistic data; Rank
                 aggregation; Ranking; Top-k; Uncertain data",
}

@Article{Lee:2010:SCE,
  author =       "Ki-Hoon Lee and Kyu-Young Whang and Wook-Shin Han and
                 Min-Soo Kim",
  title =        "Structural consistency: enabling {XML} keyword search
                 to eliminate spurious results consistently",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "503--529",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-009-0177-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "XML keyword search is a user-friendly way to query XML
                 data using only keywords. In XML keyword search, to
                 achieve high precision without sacrificing recall, it
                 is important to remove spurious results not intended by
                 the user. Efforts to eliminate spurious results have
                 enjoyed some success using the concepts of LCA or its
                 variants, SLCA and MLCA. However, existing methods
                 still could find many spurious results. The fundamental
                 cause for the occurrence of spurious results is that
                 the existing methods try to eliminate spurious results
                 locally without global examination of all the query
                 results and, accordingly, some spurious results are not
                 consistently eliminated. In this paper, we propose a
                 novel keyword search method that removes spurious
                 results consistently by exploiting the new concept of
                 structural consistency. We define structural
                 consistency as a property that is preserved if there is
                 no query result having an ancestor-descendant
                 relationship at the schema level with any other query
                 results. A naive solution to obtain structural
                 consistency would be to compute all the LCAs (or
                 variants) and then to remove spurious results according
                 to structural consistency. Obviously, this approach
                 would always be slower than existing LCA-based ones. To
                 speed up structural consistency checking, we must be
                 able to examine the query results at the schema level
                 without generating all the LCAs. However, this is a
                 challenging problem since the schema-level query
                 results do not homomorphically map to the
                 instance-level query results, causing serious false
                 dismissal. We present a comprehensive and practical
                 solution to this problem and formally prove that this
                 solution preserves structural consistency at the schema
                 level without incurring false dismissal. We also
                 propose a relevance-feedback-based solution for the
                 problem where our method has low recall, which occurs
                 when it is not the user's intention to find more
                 specific results. This solution has been prototyped in
                 a full-fledged object-relational DBMS Odysseus
                 developed at KAIST. Experimental results using real and
                 synthetic data sets show that, compared with the
                 state-of-the-art methods, our solution significantly
                 (1) improves precision while providing comparable
                 recall for most queries and (2) enhances the query
                 performance by removing spurious results early.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Keyword search; Odysseus DBMS; Spurious results;
                 Structural consistency; Structural summary; XML",
}

@Article{Lucchese:2010:RPT,
  author =       "Claudio Lucchese and Michail Vlachos and Deepak Rajan
                 and Philip S. Yu",
  title =        "Rights protection of trajectory datasets with
                 nearest-neighbor preservation",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "531--556",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0178-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Companies frequently outsource datasets to mining
                 firms, and academic institutions create repositories or
                 share datasets in the interest of promoting research
                 collaboration. Still, many practitioners have
                 reservations about sharing or outsourcing datasets,
                 primarily because of fear of losing the principal
                 rights over the dataset. This work presents a way of
                 convincingly claiming ownership rights over a
                 trajectory dataset, without, at the same time,
                 destroying the salient dataset characteristics, which
                 are important for accurate search operations and
                 data-mining tasks. The digital watermarking methodology
                 that we present distorts imperceptibly a collection of
                 sequences, effectively embedding a secret key, while
                 retaining as well as possible the neighborhood of each
                 object, which is vital for operations such as
                 similarity search, classification, or clustering. A key
                 contribution in this methodology is a technique for
                 discovering the maximum distortion that still maintains
                 such desirable properties. We demonstrate both
                 analytically and empirically that the proposed dataset
                 marking techniques can withstand a number of attacks
                 (such a translation, rotation, noise addition, etc) and
                 therefore can provide a robust framework for
                 facilitating the secure dissemination of trajectory
                 datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Nearest neighbors; Rights protection; Time-series;
                 Trajectories; Watermarking",
}

@Article{Zhang:2010:SMA,
  author =       "Rui Zhang and Nick Koudas and Beng Chin Ooi and Divesh
                 Srivastava and Pu Zhou",
  title =        "Streaming multiple aggregations using phantoms",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "557--583",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0180-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data streams characterize the high speed and large
                 volume input of a new class of applications such as
                 network monitoring, web content analysis and sensor
                 networks. Among these applications, network monitoring
                 may be the most compelling one--the backbone of a large
                 internet service provider can generate 1 petabyte of
                 data per day. For many network monitoring tasks such as
                 traffic analysis and statistics collection, aggregation
                 is a primitive operation. Various analytical and
                 statistical needs naturally lead to related aggregate
                 queries. In this article, we address the problem of
                 efficiently computing multiple aggregations over
                 high-speed data streams based on the two-level query
                 processing architecture of GS, a real data stream
                 management system deployed in AT \& T. We discern that
                 additionally computing and maintaining fine-granularity
                 aggregations (called phantoms) has the benefit of
                 supporting shared computation. Based on a thorough
                 analysis, we propose algorithms to identify the best
                 set of phantoms to maintain and determine allocation of
                 resources (particularly, space) to compute the
                 aggregations. Experiments show that our algorithm
                 achieves near-optimal computation costs, which
                 outperforms the best adapted algorithm by more than an
                 order of magnitude.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Aggregation; Data stream; GS; Multiple-query
                 optimization; Phantom",
}

@Article{Jeung:2010:PPP,
  author =       "Hoyoung Jeung and Man Lung Yiu and Xiaofang Zhou and
                 Christian S. Jensen",
  title =        "Path prediction and predictive range querying in road
                 network databases",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "4",
  pages =        "585--602",
  month =        aug,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0181-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Aug 18 12:06:22 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In automotive applications, movement-path prediction
                 enables the delivery of predictive and relevant
                 services to drivers, e.g., reporting traffic conditions
                 and gas stations along the route ahead. Path prediction
                 also enables better results of predictive range queries
                 and reduces the location update frequency in vehicle
                 tracking while preserving accuracy. Existing
                 moving-object location prediction techniques in
                 spatial-network settings largely target short-term
                 prediction that does not extend beyond the next road
                 junction. To go beyond short-term prediction, we
                 formulate a network mobility model that offers a
                 concise representation of mobility statistics extracted
                 from massive collections of historical object
                 trajectories. The model aims to capture the turning
                 patterns at junctions and the travel speeds on road
                 segments at the level of individual objects. Based on
                 the mobility model, we present a maximum likelihood and
                 a greedy algorithm for predicting the travel path of an
                 object (for a time duration h into the future). We also
                 present a novel and efficient server-side indexing
                 scheme that supports predictive range queries on the
                 mobility statistics of the objects. Empirical studies
                 with real data suggest that our proposals are effective
                 and efficient.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  keywords =     "Mobility statistics; Path prediction; Predictive range
                 query; Road network database",
}

@Article{Ali:2010:MAA,
  author =       "Mohammed Eunus Ali and Egemen Tanin and Rui Zhang and
                 Lars Kulik",
  title =        "A motion-aware approach for efficient evaluation of
                 continuous queries on {$3$D} object databases",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "603--632",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873119",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Askitis:2010:ESC,
  author =       "Nikolas Askitis and Ranjan Sinha",
  title =        "Engineering scalable, cache and space efficient tries
                 for strings",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "633--660",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873121",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wu:2010:EEG,
  author =       "Xiaoying Wu and Dimitri Theodoratos and Calisto
                 Zuzarte",
  title =        "Efficient evaluation of generalized tree-pattern
                 queries on {XML} streams",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "661--686",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873120",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guting:2010:ENN,
  author =       "Ralf Hartmut G{\"u}ting and Thomas Behr and Jianqiu
                 Xu",
  title =        "Efficient $k$-nearest neighbor search on moving object
                 trajectories",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "687--714",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873123",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2010:TQT,
  author =       "Feifei Li and Ke Yi and Wangchao Le",
  title =        "Top-$k$ queries on temporal data",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "715--733",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873122",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Duda:2010:PBI,
  author =       "Cristian Duda and Donald Kossmann and Chong Zhou",
  title =        "Predicate-based indexing for desktop search",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "5",
  pages =        "735--758",
  month =        oct,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1145/1873117.1873124",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Oct 29 17:56:55 MDT 2010",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bohm:2010:F,
  author =       "Klemens B{\"o}hm and Laks V. Lakshmanan",
  title =        "Foreword",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "759--760",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0201-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Carmel:2010:SBW,
  author =       "David Carmel and Haggai Roitman and Elad Yom-Tov",
  title =        "Social bookmark weighting for search and
                 recommendation",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "761--775",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0211-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social bookmarking enables knowledge sharing and
                 efficient discovery on the web, where users can
                 collaborate together by tagging documents of interests.
                 A lot of attention was given lately for utilizing
                 social bookmarking data to enhance traditional IR
                 tasks. Yet, much less attention was given to the
                 problem of estimating the effectiveness of an
                 individual bookmark for the specific tasks. In this
                 work, we propose a novel framework for social bookmark
                 weighting which allows us to estimate the effectiveness
                 of each of the bookmarks individually for several IR
                 tasks. We show that by weighting bookmarks according to
                 their estimated quality, we can significantly improve
                 social search effectiveness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Squicciarini:2010:PPS,
  author =       "Anna C. Squicciarini and Mohamed Shehab and Joshua
                 Wede",
  title =        "Privacy policies for shared content in social network
                 sites",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "777--796",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0193-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social networking is one of the major technological
                 phenomena of the Web 2.0, with hundreds of millions of
                 subscribed users. Social networks enable a form of
                 self-expression for users and help them to socialize
                 and share content with other users. In spite of the
                 fact that content sharing represents one of the
                 prominent features of existing Social network sites,
                 they do not provide any mechanisms for collective
                 management of privacy settings for shared content. In
                 this paper, using game theory, we model the problem of
                 collective enforcement of privacy policies on shared
                 data. In particular, we propose a solution that offers
                 automated ways to share images based on an extended
                 notion of content ownership.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hay:2010:RSR,
  author =       "Michael Hay and Gerome Miklau and David Jensen and Don
                 Towsley and Chao Li",
  title =        "Resisting structural re-identification in anonymized
                 social networks",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "797--823",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0210-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We identify privacy risks associated with releasing
                 network datasets and provide an algorithm that
                 mitigates those risks. A network dataset is a graph
                 representing entities connected by edges representing
                 relations such as friendship, communication or shared
                 activity. Maintaining privacy when publishing a network
                 dataset is uniquely challenging because an individual's
                 network context can be used to identify them even if
                 other identifying information is removed. In this
                 paper, we introduce a parameterized model of structural
                 knowledge available to the adversary and quantify the
                 success of attacks on individuals in anonymized
                 networks. We show that the risks of these attacks vary
                 based on network structure and size and provide
                 theoretical results that explain the anonymity risk in
                 random networks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gruhl:2010:MSI,
  author =       "Daniel Gruhl and Meena Nagarajan and Jan Pieper and
                 Christine Robson and Amit Sheth",
  title =        "Multimodal social intelligence in a real-time
                 dashboard system",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "825--848",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0207-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social Networks provide one of the most rapidly
                 evolving data sets in existence today. Traditional
                 Business Intelligence applications struggle to take
                 advantage of such data sets in a timely manner. The BBC
                 SoundIndex, developed by the authors and others,
                 enabled real-time analytics of music popularity using
                 data from a variety of Social Networks. We present this
                 system as a grounding example of how to overcome the
                 challenges of working with this data from social
                 networks. We discuss a variety of technologies to
                 implement near real-time data analytics to transform
                 Social Intelligence into Business Intelligence and
                 evaluate their effectiveness in the music domain.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Benz:2010:SBP,
  author =       "Dominik Benz and Andreas Hotho and Robert J{\"a}schke
                 and Beate Krause and Folke Mitzlaff and Christoph
                 Schmitz and Gerd Stumme",
  title =        "The social bookmark and publication management system
                 bibsonomy",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "849--875",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0208-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social resource sharing systems are central elements
                 of the Web 2.0 and use the same kind of lightweight
                 knowledge representation, called folksonomy. Their
                 large user communities and ever-growing networks of
                 user-generated content have made them an attractive
                 object of investigation for researchers from different
                 disciplines like Social Network Analysis, Data Mining,
                 Information Retrieval or Knowledge Discovery. In this
                 paper, we summarize and extend our work on different
                 aspects of this branch of Web 2.0 research,
                 demonstrated and evaluated within our own social
                 bookmark and publication sharing system BibSonomy,
                 which is currently among the three most popular systems
                 of its kind. We structure this presentation along the
                 different interaction phases of a user with our system,
                 coupling the relevant research questions of each phase
                 with the corresponding implementation issues.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Roy:2010:SEG,
  author =       "Senjuti Basu Roy and Sihem Amer-Yahia and Ashish
                 Chawla and Gautam Das and Cong Yu",
  title =        "Space efficiency in group recommendation",
  journal =      j-VLDB-J,
  volume =       "19",
  number =       "6",
  pages =        "877--900",
  month =        dec,
  year =         "2010",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0209-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:41 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Imagine a system that gives you satisfying
                 recommendations when you want to rent a movie with
                 friends or find a restaurant to celebrate a colleague's
                 farewell: at the core of such a system is what we call
                 group recommendation. While computing individual
                 recommendations have received lots of attention (e.g.,
                 Netflix prize), group recommendation has been confined
                 to studying users' satisfaction with different
                 aggregation strategies. In this paper (Some results are
                 published in an earlier conference paper (Amer-Yahia et
                 al. in VLDB, 2009). See Sect. ``Paper contributions and
                 outline'' for details.) , we describe the challenges
                 and desiderata of group recommendation and formalize
                 different group consensus semantics that account for
                 both an item's predicted ratings to the group members
                 and the disagreements among them.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2011:PBK,
  author =       "Guoliang Li and Jianhua Feng and Xiaofang Zhou and
                 Jianyong Wang",
  title =        "Providing built-in keyword search capabilities in
                 {RDBMS}",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "1--19",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0188-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A common approach to performing keyword search over
                 relational databases is to find the minimum Steiner
                 trees in database graphs transformed from relational
                 data. These methods, however, are rather expensive as
                 the minimum Steiner tree problem is known to be
                 NP-hard. Further, these methods are independent of the
                 underlying relational database management system
                 (RDBMS), thus cannot benefit from the capabilities of
                 the RDBMS. As an alternative, in this paper we propose
                 a new concept called Compact Steiner Tree (CSTree),
                 which can be used to approximate the Steiner tree
                 problem for answering top-$k$ keyword queries
                 efficiently. We propose a novel structure-aware index,
                 together with an effective ranking mechanism for fast,
                 progressive and accurate retrieval of top-$k$ highest
                 ranked CSTrees.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cai:2011:SKD,
  author =       "Deng Cai and Xiaofei He and Jiawei Han",
  title =        "Speed up kernel discriminant analysis",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "21--33",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0189-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Linear discriminant analysis (LDA) has been a popular
                 method for dimensionality reduction, which preserves
                 class separability. The projection vectors are commonly
                 obtained by maximizing the between-class covariance and
                 simultaneously minimizing the within-class covariance.
                 LDA can be performed either in the original input space
                 or in the reproducing kernel Hilbert space (RKHS) into
                 which data points are mapped, which leads to kernel
                 discriminant analysis (KDA). When the data are highly
                 nonlinear distributed, KDA can achieve better
                 performance than LDA. However, computing the projective
                 functions in KDA involves eigen-decomposition of kernel
                 matrix, which is very expensive when a large number of
                 training samples exist.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Qin:2011:SKS,
  author =       "Lu Qin and Jeffrey Xu Yu and Lijun Chang",
  title =        "Scalable keyword search on large data streams",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "35--57",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0190-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "It is widely recognized that the integration of
                 information retrieval (IR) and database (DB) techniques
                 provides users with a broad range of high quality
                 services. Along this direction, IR-styled $m$-keyword
                 query processing over a relational database in an rdbms
                 framework has been well studied. It finds all hidden
                 interconnected tuple structures, for example connected
                 trees that contain keywords and are interconnected by
                 sequences of primary/foreign key relationships among
                 tuples. A new challenging issue is how to monitor
                 events that are implicitly interrelated over an
                 open-ended relational data stream for a user-given
                 $m$-keyword query. Such a relational data stream is a
                 sequence of tuple insertion/deletion operations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cao:2011:SSA,
  author =       "Jianneng Cao and Panagiotis Karras and Panos Kalnis
                 and Kian-Lee Tan",
  title =        "{SABRE}: a {Sensitive Attribute Bucketization and
                 REdistribution} framework for $t$-closeness",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "59--81",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0191-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Today, the publication of microdata poses a privacy
                 threat: anonymous personal records can be re-identified
                 using third data sources. Past research has tried to
                 develop a concept of privacy guarantee that an
                 anonymized data set should satisfy before publication,
                 culminating in the notion of $t$-closeness. To satisfy
                 $t$-closeness, the records in a data set need to be
                 grouped into Equivalence Classes (ECs), such that each
                 EC contains records of indistinguishable
                 quasi-identifier values, and its local distribution of
                 sensitive attribute (SA) values conforms to the global
                 table distribution of SA values. However, despite this
                 progress, previous research has not offered an
                 anonymization algorithm tailored for $t$-closeness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Terrovitis:2011:LGR,
  author =       "Manolis Terrovitis and Nikos Mamoulis and Panos
                 Kalnis",
  title =        "Local and global recoding methods for anonymizing
                 set-valued data",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "83--106",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0192-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study the problem of protecting
                 privacy in the publication of set-valued data. Consider
                 a collection of supermarket transactions that contains
                 detailed information about items bought together by
                 individuals. Even after removing all personal
                 characteristics of the buyer, which can serve as links
                 to his identity, the publication of such data is still
                 subject to privacy attacks from adversaries who have
                 partial knowledge about the set. Unlike most previous
                 works, we do not distinguish data as sensitive and
                 non-sensitive, but we consider them both as potential
                 quasi-identifiers and potential sensitive data,
                 depending on the knowledge of the adversary.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lian:2011:PIR,
  author =       "Xiang Lian and Lei Chen",
  title =        "Probabilistic inverse ranking queries in uncertain
                 databases",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "107--127",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0195-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Query processing in the uncertain database has become
                 increasingly important due to the wide existence of
                 uncertain data in many real applications. Different
                 from handling precise data, the uncertain query
                 processing needs to consider the data uncertainty and
                 answer queries with confidence guarantees. In this
                 paper, we formulate and tackle an important query,
                 namely probabilistic inverse ranking (PIR) query, which
                 retrieves possible ranks of a given query object in an
                 uncertain database with confidence above a probability
                 threshold. We present effective pruning methods to
                 reduce the PIR search space, which can be seamlessly
                 integrated into an efficient query procedure. Moreover,
                 we tackle the problem of PIR query processing in high
                 dimensional spaces, which reduces high dimensional
                 uncertain data to a lower dimensional space.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hua:2011:RQU,
  author =       "Ming Hua and Jian Pei and Xuemin Lin",
  title =        "Ranking queries on uncertain data",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "1",
  pages =        "129--153",
  month =        feb,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0196-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Feb 7 10:43:36 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Uncertain data is inherent in a few important
                 applications. It is far from trivial to extend ranking
                 queries (also known as top-$k$ queries), a popular type
                 of queries on certain data, to uncertain data. In this
                 paper, we cast ranking queries on uncertain data using
                 three parameters: rank threshold $k$, probability
                 threshold $p$, and answer set size threshold $l$.
                 Systematically, we identify four types of ranking
                 queries on uncertain data. First, a probability
                 threshold top-$k$ query computes the uncertain records
                 taking a probability of at least $p$ to be in the top-k
                 list. Second, a top-$(k, l)$ query returns the top-$l$
                 uncertain records whose probabilities of being ranked
                 among top-$k$ are the largest.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abiteboul:2011:SIB,
  author =       "Serge Abiteboul and Volker Markl and Tova Milo and
                 Jignesh Patel",
  title =        "Special issue: best papers of {VLDB} 2009",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "155--156",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0222-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mindolin:2011:PEP,
  author =       "Denis Mindolin and Jan Chomicki",
  title =        "Preference elicitation in prioritized skyline
                 queries",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "157--182",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0227-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Preference queries incorporate the notion of binary
                 preference relation into relational database querying.
                 Instead of returning all the answers, such queries
                 return only the best answers, according to a given
                 preference relation. Preference queries are a fast
                 growing area of database research. Skyline queries
                 constitute one of the most thoroughly studied classes
                 of preference queries. A well-known limitation of
                 skyline queries is that skyline preference relations
                 assign the same importance to all attributes. In this
                 work, we study p-skyline queries that generalize
                 skyline queries by allowing varying attribute
                 importance in preference relations. We perform an
                 in-depth study of the properties of p-skyline
                 preference relations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Denev:2011:SFD,
  author =       "Dimitar Denev and Arturas Mazeika and Marc Spaniol and
                 Gerhard Weikum",
  title =        "The {SHARC} framework for data quality in {Web}
                 archiving",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "183--207",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0219-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Web archives preserve the history of born-digital
                 content and offer great potential for sociologists,
                 business analysts, and legal experts on intellectual
                 property and compliance issues. Data quality is crucial
                 for these purposes. Ideally, crawlers should gather
                 coherent captures of entire Web sites, but the
                 politeness etiquette and completeness requirement
                 mandate very slow, long-duration crawling while Web
                 sites undergo changes. This paper presents the SHARC
                 framework for assessing the data quality in Web
                 archives and for tuning capturing strategies toward
                 better quality with given resources. We define data
                 quality measures, characterize their properties, and
                 develop a suite of quality-conscious scheduling
                 strategies for archive crawling.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Elmeleegy:2011:HRT,
  author =       "Hazem Elmeleegy and Jayant Madhavan and Alon Halevy",
  title =        "Harvesting relational tables from lists on the {Web}",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "209--226",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0223-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A large number of web pages contain data structured in
                 the form of ``lists''. Many such lists can be further
                 split into multi-column tables, which can then be used
                 in more semantically meaningful tasks. However,
                 harvesting relational tables from such lists can be a
                 challenging task. The lists are manually generated and
                 hence need not have well-defined templates--they have
                 inconsistent delimiters (if any) and often have missing
                 information. We propose a novel technique for
                 extracting tables from lists. The technique is domain
                 independent and operates in a fully unsupervised
                 manner. We first use multiple sources of information to
                 split individual lines into multiple fields and then,
                 compare the splits across multiple lines to identify
                 and fix incorrect splits and bad alignments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Candea:2011:PPH,
  author =       "George Candea and Neoklis Polyzotis and Radek
                 Vingralek",
  title =        "Predictable performance and high query concurrency for
                 data analytics",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "227--248",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0221-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Conventional data warehouses employ the
                 query-at-a-time model, which maps each query to a
                 distinct physical plan. When several queries execute
                 concurrently, this model introduces contention and
                 thrashing, because the physical plans--unaware of each
                 other--compete for access to the underlying I/O and
                 computation resources. As a result, while modern
                 systems can efficiently optimize and evaluate a single
                 complex data analysis query, their performance suffers
                 significantly and can be highly erratic when multiple
                 complex queries run at the same time. We present in
                 this paper Cjoin, a new design that substantially
                 improves throughput in large-scale data analytics
                 systems processing many concurrent join queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2011:UAR,
  author =       "Jian Li and Barna Saha and Amol Deshpande",
  title =        "A unified approach to ranking in probabilistic
                 databases",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "249--275",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0220-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Ranking is a fundamental operation in data analysis
                 and decision support and plays an even more crucial
                 role if the dataset being explored exhibits
                 uncertainty. This has led to much work in understanding
                 how to rank the tuples in a probabilistic dataset in
                 recent years. In this article, we present a unified
                 approach to ranking and top-k query processing in
                 probabilistic databases by viewing it as a
                 multi-criterion optimization problem and by deriving a
                 set of features that capture the key properties of a
                 probabilistic dataset that dictate the ranked result.
                 We contend that a single, specific ranking function may
                 not suffice for probabilistic databases, and we instead
                 propose two parameterized ranking functions, called PRF
                 \ldots{}",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gottlob:2011:NOS,
  author =       "Georg Gottlob and Reinhard Pichler and Vadim
                 Savenkov",
  title =        "Normalization and optimization of schema mappings",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "2",
  pages =        "277--302",
  month =        apr,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0226-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Apr 13 17:51:05 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Schema mappings are high-level specifications that
                 describe the relationship between database schemas.
                 They are an important tool in several areas of database
                 research, notably in data integration and data
                 exchange. However, a concrete theory of schema mapping
                 optimization including the formulation of optimality
                 criteria and the construction of algorithms for
                 computing optimal schema mappings is completely lacking
                 to date. The goal of this work is to fill this gap. We
                 start by presenting a system of rewrite rules to
                 minimize sets of source-to-target tuple-generating
                 dependencies. Moreover, we show that the result of this
                 minimization is unique up to variable renaming. Hence,
                 our optimization also yields a schema mapping
                 normalization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cho:2011:LRM,
  author =       "Chung-Wen Cho and Yi-Hung Wu and Show-Jane Yen and
                 Ying Zheng and Arbee L. Chen",
  title =        "On-line rule matching for event prediction",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "303--334",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0197-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The prediction of future events has great importance
                 in many applications. The prediction is based on
                 episode rules which are composed of events and two time
                 constraints which require all the events in the episode
                 rule and in the predicate of the rule to occur in a
                 time interval, respectively. In an event stream, a
                 sequence of events which matches the predicate of the
                 rule satisfying the specified time constraint is called
                 an occurrence of the predicate. After finding the
                 occurrence, the consequent event which will occur in a
                 time interval can be predicted. However, the time
                 intervals computed from some occurrences for predicting
                 the event can be contained in the time intervals
                 computed from other occurrence and become redundant.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2011:MLD,
  author =       "Jun Liu and Lu Jiang and Zhaohui Wu and Qinghua Zheng
                 and Yanan Qian",
  title =        "Mining learning-dependency between knowledge units
                 from text",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "335--345",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0198-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Identifying learning-dependency among the knowledge
                 units (KU) is a preliminary requirement of navigation
                 learning. Methods based on link mining lack the ability
                 of discovering such dependencies among knowledge units
                 that are arranged in a linear way in the text. In this
                 paper, we propose a method of mining the learning-
                 dependencies among the KU from text document. This
                 method is based on two features that we found and
                 studied from the KU and the learning-dependencies among
                 them. They are the distributional asymmetry of the
                 domain terms and the local nature of the
                 learning-dependency, respectively. Our method consists
                 of three stages, (1) Build document association
                 relationship by calculating the distributional
                 asymmetry of the domain terms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2011:LBM,
  author =       "Rui Wang and Betty Salzberg and David Lomet",
  title =        "Log-based middleware server recovery with transaction
                 support",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "347--370",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0199-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Providing enterprises with reliable and available
                 Web-based application programs is a challenge.
                 Applications are traditionally spread over multiple
                 nodes, from user (client), to middle tier servers, to
                 back end transaction systems, e.g. databases. It has
                 proven very difficult to ensure that these applications
                 persist across system crashes so that ``exactly once''
                 execution is produced, always important and sometimes
                 essential, e.g., in the financial area. Our system
                 provides a framework for exactly once execution of
                 multi-tier Web applications, built on a commercially
                 available Web infrastructure. Its capabilities include
                 low logging overhead, recovery isolation
                 (independence), and consistency between mid-tier and
                 transactional back end.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2011:CVN,
  author =       "Yunjun Gao and Baihua Zheng and Gencai Chen and Qing
                 Li and Xiaofa Guo",
  title =        "Continuous visible nearest neighbor query processing
                 in spatial databases",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "371--396",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0200-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we identify and solve a new type of
                 spatial queries, called continuous visible nearest
                 neighbor (CVNN) search. Given a data set P, an obstacle
                 set O, and a query line segment q in a two-dimensional
                 space, a CVNN query returns a set of $${\langle p,
                 R\rangle}$$ tuples such that \ldots{}",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Luo:2011:QRI,
  author =       "Bo Luo and Dongwon Lee and Wang-Chien Lee and Peng
                 Liu",
  title =        "{QFilter}: rewriting insecure {XML} queries to secure
                 ones using non-deterministic finite automata",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "397--415",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0202-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we ask whether XML access control can
                 be supported when underlying (XML or relational)
                 storage system does not provide adequate security
                 features and propose three alternative solutions
                 --primitive, pre-processing, and post-processing.
                 Toward that scenario, in particular, we advocate a
                 scalable and effective pre-processing approach, called
                 QFilter. QFilter is based on non-deterministic finite
                 automata (NFA) and rewrites user's queries such that
                 parts violating access control rules are pre-pruned.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Motahari-Nezhad:2011:ECP,
  author =       "Hamid Reza Motahari-Nezhad and Regis Saint-Paul and
                 Fabio Casati and Boualem Benatallah",
  title =        "Event correlation for process discovery from web
                 service interaction logs",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "417--444",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0203-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Understanding, analyzing, and ultimately improving
                 business processes is a goal of enterprises today.
                 These tasks are challenging as business processes in
                 modern enterprises are implemented over several
                 applications and Web services, and the information
                 about process execution is scattered across several
                 data sources. Understanding modern business processes
                 entails identifying the correlation between events in
                 data sources in the context of business processes
                 (event correlation is the process of finding
                 relationships between events that belong to the same
                 process execution instance). In this paper, we
                 investigate the problem of event correlation for
                 business processes that are realized through the
                 interactions of a set of Web services.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chakrabarti:2011:IDQ,
  author =       "Soumen Chakrabarti and Amit Pathak and Manish Gupta",
  title =        "Index design and query processing for graph
                 conductance search",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "3",
  pages =        "445--470",
  month =        jun,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0204-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 14 11:27:46 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graph conductance queries, also known as personalized
                 PageRank and related to random walks with restarts,
                 were originally proposed to assign a hyperlink-based
                 prestige score to Web pages. More general forms of such
                 queries are also very useful for ranking in
                 entity-relation (ER) graphs used to represent
                 relational, XML and hypertext data. Evaluation of
                 PageRank usually involves a global eigen computation.
                 If the graph is even moderately large, interactive
                 response times may not be possible. Recently, the need
                 for interactive PageRank evaluation has increased. The
                 graph may be fully known only when the query is
                 submitted. Browsing actions of the user may change some
                 inputs to the PageRank computation dynamically.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2011:PAD,
  author =       "Shaoping Chen and Yi-Cheng Tu and Yuni Xia",
  title =        "Performance analysis of a dual-tree algorithm for
                 computing spatial distance histograms",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "471--494",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0205-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many scientific and engineering fields produce large
                 volume of spatiotemporal data. The storage, retrieval,
                 and analysis of such data impose great challenges to
                 database systems design. Analysis of scientific
                 spatiotemporal data often involves computing functions
                 of all point-to-point interactions. One such analytics,
                 the Spatial Distance Histogram (SDH), is of vital
                 importance to scientific discovery. Recently,
                 algorithms for efficient SDH processing in large-scale
                 scientific databases have been proposed. These
                 algorithms adopt a recursive tree-traversing strategy
                 to process point-to-point distances in the visited tree
                 nodes in batches, thus require less time when compared
                 to the brute-force approach where all pairwise
                 distances have to be computed.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fan:2011:DCR,
  author =       "Wenfei Fan and Hong Gao and Xibei Jia and Jianzhong Li
                 and Shuai Ma",
  title =        "Dynamic constraints for record matching",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "495--520",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0206-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper investigates constraints for matching
                 records from unreliable data sources. (a) We introduce
                 a class of matching dependencies (mds) for specifying
                 the semantics of unreliable data. As opposed to static
                 constraints for schema design, mds are developed for
                 record matching, and are defined in terms of similarity
                 predicates and a dynamic semantics. (b) We identify a
                 special case of mds, referred to as relative candidate
                 keys (rcks), to determine what attributes to compare
                 and how to compare them when matching records across
                 possibly different relations. (c) We propose a
                 mechanism for inferring mds, a departure from
                 traditional implication analysis, such that when we
                 cannot match records by comparing attributes that
                 contain errors, we may still find matches by using
                 other, more reliable attributes.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cheng:2011:FGQ,
  author =       "James Cheng and Yiping Ke and Ada Wai-Chee Fu and
                 Jeffrey Xu Yu",
  title =        "Fast graph query processing with a low-cost index",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "521--539",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0212-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper studies the problem of processing
                 supergraph queries, that is, given a database
                 containing a set of graphs, find all the graphs in the
                 database of which the query graph is a supergraph.
                 Existing works usually construct an index and performs
                 a filtering-and-verification process, which still
                 requires many subgraph isomorphism testings. There are
                 also significant overheads in both index construction
                 and maintenance. In this paper, we design a graph
                 querying system that achieves both fast indexing and
                 efficient query processing. The index is constructed by
                 a simple but fast method of extracting the commonality
                 among the graphs, which does not involve any costly
                 operation such as graph mining.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mascetti:2011:PGS,
  author =       "Sergio Mascetti and Dario Freni and Claudio Bettini
                 and X. Sean Wang and Sushil Jajodia",
  title =        "Privacy in geo-social networks: proximity notification
                 with untrusted service providers and curious buddies",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "541--566",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0213-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A major feature of the emerging geo-social networks is
                 the ability to notify a user when any of his friends
                 (also called buddies) happens to be geographically in
                 proximity. This proximity service is usually offered by
                 the network itself or by a third party service provider
                 (SP) using location data acquired from the users. This
                 paper provides a rigorous theoretical and experimental
                 analysis of the existing solutions for the location
                 privacy problem in proximity services. This is a
                 serious problem for users who do not trust the SP to
                 handle their location data and would only like to
                 release their location information in a generalized
                 form to participating buddies.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mohammed:2011:AMG,
  author =       "Noman Mohammed and Benjamin C. Fung and Mourad
                 Debbabi",
  title =        "Anonymity meets game theory: secure data integration
                 with malicious participants",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "567--588",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-010-0214-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data integration methods enable different data
                 providers to flexibly integrate their expertise and
                 deliver highly customizable services to their
                 customers. Nonetheless, combining data from different
                 sources could potentially reveal person-specific
                 sensitive information. In VLDBJ 2006, Jiang and Clifton
                 (Very Large Data Bases J (VLDBJ) 15(4):316---333, 2006)
                 propose a secure Distributed k-Anonymity (DkA)
                 framework for integrating two private data tables to a
                 k-anonymous table in which each private table is a
                 vertical partition on the same set of records. Their
                 proposed DkA framework is not scalable to large data
                 sets. Moreover, DkA is limited to a two-party scenario
                 and the parties are assumed to be semi-honest.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ahmad:2011:IAS,
  author =       "Mumtaz Ahmad and Ashraf Aboulnaga and Shivnath Babu
                 and Kamesh Munagala",
  title =        "Interaction-aware scheduling of report-generation
                 workloads",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "589--615",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0217-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The typical workload in a database system consists of
                 a mix of multiple queries of different types that run
                 concurrently. Interactions among the different queries
                 in a query mix can have a significant impact on
                 database performance. Hence, optimizing database
                 performance requires reasoning about query mixes rather
                 than considering queries individually. Current database
                 systems lack the ability to do such reasoning. We
                 propose a new approach based on planning experiments
                 and statistical modeling to capture the impact of query
                 interactions. Our approach requires no prior
                 assumptions about the internal workings of the database
                 system or the nature and cause of query interactions,
                 making it portable across systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2011:EFF,
  author =       "Guoliang Li and Shengyue Ji and Chen Li and Jianhua
                 Feng",
  title =        "Efficient fuzzy full-text type-ahead search",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "4",
  pages =        "617--640",
  month =        aug,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0218-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 16 19:01:00 MDT 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional information systems return answers after a
                 user submits a complete query. Users often feel ``left
                 in the dark'' when they have limited knowledge about
                 the underlying data and have to use a try-and-see
                 approach for finding information. A recent trend of
                 supporting autocomplete in these systems is a first
                 step toward solving this problem. In this paper, we
                 study a new information-access paradigm, called
                 ``type-ahead search'' in which the system searches the
                 underlying data ``on the fly'' as the user types in
                 query keywords. It extends autocomplete interfaces by
                 allowing keywords to appear at different places in the
                 underlying data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guting:2011:SID,
  author =       "Ralf Hartmut G{\"u}ting and Nikos Mamoulis",
  title =        "Special issue on data management for mobile services",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "641--642",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0250-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Popa:2011:INT,
  author =       "Iulian Sandu Popa and Karine Zeitouni and Vincent Oria
                 and Dominique Barth and Sandrine Vial",
  title =        "Indexing in-network trajectory flows",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "643--669",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0236-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lange:2011:ERT,
  author =       "Ralph Lange and Frank D{\"u}rr and Kurt Rothermel",
  title =        "Efficient real-time trajectory tracking",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "671--694",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0237-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Giannotti:2011:UCH,
  author =       "Fosca Giannotti and Mirco Nanni and Dino Pedreschi and
                 Fabio Pinelli and Chiara Renso and Salvatore Rinzivillo
                 and Roberto Trasarti",
  title =        "Unveiling the complexity of human mobility by querying
                 and mining massive trajectory data",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "695--719",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0244-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Timko:2011:SSA,
  author =       "Igor Timko and Michael B{\"o}hlen and Johann Gamper",
  title =        "Sequenced spatiotemporal aggregation for coarse query
                 granularities",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "721--741",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0247-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guo:2011:DBS,
  author =       "Xi Guo and Baihua Zheng and Yoshiharu Ishikawa and
                 Yunjun Gao",
  title =        "Direction-based surrounder queries for mobile
                 recommendations",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "743--766",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0241-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Trajcevski:2011:RCN,
  author =       "Goce Trajcevski and Roberto Tamassia and Isabel F.
                 Cruz and Peter Scheuermann and David Hartglass and
                 Christopher Zamierowski",
  title =        "Ranking continuous nearest neighbors for uncertain
                 trajectories",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "5",
  pages =        "767--791",
  month =        oct,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0249-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:25 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Rao:2011:STE,
  author =       "Weixiong Rao and Lei Chen and Ada Wai-Chee Fu",
  title =        "{STAIRS}: {Towards} efficient full-text filtering and
                 dissemination in {DHT} environments",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "793--817",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0224-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Nowadays ``live'' content, such as weblog, wikipedia,
                 and news, is ubiquitous in the Internet. Providing
                 users with relevant content in a timely manner becomes
                 a challenging problem. Differing from Web search
                 technologies and RSS feeds/reader applications, this
                 paper envisions a personalized full-text content
                 filtering and dissemination system in a highly
                 distributed environment such as a Distributed Hash
                 Table (DHT) based Peer-to-Peer (P2P) Network. Users
                 subscribe to their interested content by specifying
                 input keywords and thresholds as filters. Then, content
                 is disseminated to those users having interest in it.
                 In the literature, full-text document publishing in
                 DHTs has suffered for a long time from the high cost of
                 forwarding a document to home nodes of all distinct
                 terms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lian:2011:STS,
  author =       "Xiang Lian and Lei Chen",
  title =        "Shooting top-$k$ stars in uncertain databases",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "819--840",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0225-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Query processing in the uncertain database has played
                 an important role in many real-world applications due
                 to the wide existence of uncertain data. Although many
                 previous techniques can correctly handle precise data,
                 they are not directly applicable to the uncertain
                 scenario. In this article, we investigate and propose a
                 novel query, namely probabilistic top-k star (PTkS)
                 query, which aims to retrieve k objects in an uncertain
                 database that are ``closest'' to a static/dynamic query
                 point, considering both distance and probability
                 aspects. In order to efficiently answer PTkS queries
                 with a static/moving query point, we propose effective
                 pruning methods to reduce the PTkS search space, which
                 can be seamlessly integrated into an efficient query
                 procedure.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Perez-Sorrosal:2011:ESC,
  author =       "Francisco Perez-Sorrosal and Marta Pati{\~n}o-Martinez
                 and Ricardo Jimenez-Peris and Bettina Kemme",
  title =        "Elastic {SI-Cache}: consistent and scalable caching in
                 multi-tier architectures",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "841--865",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0228-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The new vision of cloud computing demands scalable,
                 available and autonomic software platforms in order to
                 deploy applications and services accessible anywhere
                 and anytime. Multi-tier architectures are an important
                 building block for many applications that are deployed
                 in the cloud. This paper presents a novel caching and
                 replication infrastructure that facilitates the
                 scalable and elastic deployment of multi-tier
                 architectures. Our Elastic SI-Cache is a novel
                 multi-version cache that attains high performance and
                 consistency in multi-tier systems. In contrast to most
                 existing caches, Elastic SI-Cache provides snapshot
                 isolation coherently across all tiers. Furthermore,
                 Elastic SI-Cache supports scalable replication of the
                 different tiers where replicas can be added or removed
                 dynamically as needed, making the cache amenable for
                 cloud computing environments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Moga:2011:USC,
  author =       "Alexandru Moga and Irina Botan and Nesime Tatbul",
  title =        "{UpStream}: storage-centric load management for
                 streaming applications with update semantics",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "867--892",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0229-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper addresses the problem of minimizing the
                 staleness of query results for streaming applications
                 with update semantics under overload conditions.
                 Staleness is a measure of how out-of-date the results
                 are compared with the latest data arriving on the
                 input. Real-time streaming applications are subject to
                 overload due to unpredictably increasing data rates,
                 while in many of them, we observe that data streams and
                 queries in fact exhibit ``update semantics'' (i.e., the
                 latest input data are all that really matters when
                 producing a query result). Under such semantics,
                 overload will cause staleness to build up. The key to
                 avoid this is to exploit the update semantics of
                 applications as early as possible in the processing
                 pipeline.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wong:2011:MBR,
  author =       "Raymond Chi-Wing Wong and M. Tamer {\"O}zsu and Ada
                 Wai-Chee Fu and Philip S. Yu and Lian Liu and Yubao Liu",
  title =        "Maximizing bichromatic reverse nearest neighbor for
                 {Lp-norm} in two- and three-dimensional spaces",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "893--919",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0230-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Bichromatic reverse nearest neighbor (BRNN) has been
                 extensively studied in spatial database literature. In
                 this paper, we study a related problem called MaxBRNN:
                 find an optimal region that maximizes the size of BRNNs
                 for L p -norm in two- and three- dimensional spaces.
                 Such a problem has many real-life applications,
                 including the problem of finding a new server point
                 that attracts as many customers as possible by
                 proximity.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tiakas:2011:PPS,
  author =       "Eleftherios Tiakas and Apostolos N. Papadopoulos and
                 Yannis Manolopoulos",
  title =        "Progressive processing of subspace dominating
                 queries",
  journal =      j-VLDB-J,
  volume =       "20",
  number =       "6",
  pages =        "921--948",
  month =        dec,
  year =         "2011",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0231-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Dec 15 07:28:26 MST 2011",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A top-k dominating query reports the k items with the
                 highest domination score. Algorithms for efficient
                 processing of this query have been recently proposed in
                 the literature. Those methods, either index based or
                 index free, apply a series of pruning criteria toward
                 efficient processing. However, they are characterized
                 by several limitations, such as (1) they lack
                 progressiveness (they report the k best items at the
                 end of the processing), (2) they require a
                 multi-dimensional index or they build a grid-based
                 index on-the-fly, which suffers from performance
                 degradation, especially in high dimensionalities, and
                 (3) they do not support vertically decomposed data. In
                 this paper, we design efficient algorithms that can
                 handle any subset of the dimensions in a progressive
                 manner.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mueller:2012:SNF,
  author =       "Rene Mueller and Jens Teubner and Gustavo Alonso",
  title =        "Sorting networks on {FPGAs}",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "1--23",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0232-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Computer architectures are quickly changing toward
                 heterogeneous many-core systems. Such a trend opens up
                 interesting opportunities but also raises immense
                 challenges since the efficient use of heterogeneous
                 many-core systems is not a trivial problem.
                 Software-configurable microprocessors and FPGAs add
                 further diversity but also increase complexity. In this
                 paper, we explore the use of sorting networks on
                 field-programmable gate arrays (FPGAs). FPGAs are very
                 versatile in terms of how they can be used and can also
                 be added as additional processing units in standard CPU
                 sockets. Our results indicate that efficient usage of
                 FPGAs involves non-trivial aspects such as having the
                 right computation model (a sorting network in this
                 case); a careful implementation that balances all the
                 design constraints in an FPGA; and the proper
                 integration strategy to link the FPGA to the rest of
                 the system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Georgoulas:2012:DSE,
  author =       "Konstantinos Georgoulas and Yannis Kotidis",
  title =        "Distributed similarity estimation using derived
                 dimensions",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "25--50",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0233-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Computing the similarity between data objects is a
                 fundamental operation for many distributed applications
                 such as those on the World Wide Web, in Peer-to-Peer
                 networks, or even in Sensor Networks. In our work, we
                 provide a framework based on Random Hyperplane
                 Projection (RHP) that permits continuous computation of
                 similarity estimates (using the cosine similarity or
                 the correlation coefficient as the preferred similarity
                 metric) between data descriptions that are streamed
                 from remote sites. These estimates are computed at a
                 monitoring node, without the need for transmitting the
                 actual data values. The original RHP framework is data
                 agnostic and works for arbitrary data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deutch:2012:TIT,
  author =       "Daniel Deutch and Tova Milo",
  title =        "Type inference and type checking for queries over
                 execution traces",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "51--68",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0234-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We study here Type Inference and Type Checking for
                 queries over the execution traces of Business
                 Processes. We define formal models for such execution
                 traces, allowing to capture various realistic scenarios
                 of partial information about these traces. We then
                 define corresponding notions of types, and the problems
                 of type inference and type checking in this context. We
                 further provide a comprehensive study of the
                 decidability and complexity of these problems, in
                 various cases, and suggest efficient algorithms where
                 possible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cheema:2012:CRN,
  author =       "Muhammad Aamir Cheema and Wenjie Zhang and Xuemin Lin
                 and Ying Zhang and Xuefei Li",
  title =        "Continuous reverse $k$ nearest neighbors queries in
                 {Euclidean} space and in spatial networks",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "69--95",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0235-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study the problem of continuous
                 monitoring of reverse k nearest neighbors queries in
                 Euclidean space as well as in spatial networks.
                 Existing techniques are sensitive toward objects and
                 queries movement. For example, the results of a query
                 are to be recomputed whenever the query changes its
                 location. We present a framework for continuous reverse
                 k nearest neighbor (RkNN) queries by assigning each
                 object and query with a safe region such that the
                 expensive recomputation is not required as long as the
                 query and objects remain in their respective safe
                 regions. This significantly improves the computation
                 cost. As a byproduct, our framework also reduces the
                 communication cost in client---server architectures
                 because an object does not report its location to the
                 server unless it leaves its safe region or the server
                 sends a location update request.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zou:2012:APM,
  author =       "Lei Zou and Lei Chen and M. Tamer {\"O}zsu and Dongyan
                 Zhao",
  title =        "Answering pattern match queries in large graph
                 databases via graph embedding",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "97--120",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0238-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The growing popularity of graph databases has
                 generated interesting data management problems, such as
                 subgraph search, shortest path query, reachability
                 verification, and pattern matching. Among these, a
                 pattern match query is more flexible compared with a
                 subgraph search and more informative compared with a
                 shortest path or a reachability query. In this paper,
                 we address distance-based pattern match queries over a
                 large data graph G. Due to the huge search space, we
                 adopt a filter-and-refine framework to answer a pattern
                 match query over a large graph. We first find a set of
                 candidate matches by a graph embedding technique and
                 then evaluate these to find the exact matches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hartmann:2012:DES,
  author =       "Sven Hartmann and Markus Kirchberg and Sebastian
                 Link",
  title =        "Design by example for {SQL} table definitions with
                 functional dependencies",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "121--144",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0239-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A database is C-Armstrong for a given set of
                 constraints in a class C if it satisfies every
                 constraint of the set and violates every constraint in
                 C not implied by the set. Therefore, Armstrong
                 databases are test data that perfectly illustrate the
                 current perceptions about the semantics of a schema. We
                 extend the existing theory of Armstrong relations to a
                 toolbox of Armstrong tables. That is, we investigate
                 structural and computational properties of Armstrong
                 tables for the class of functional dependencies (FDs)
                 over SQL tables. Relations are special instances of SQL
                 tables with no duplicate rows and no null value
                 occurrences.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guravannavar:2012:WSO,
  author =       "Ravindra Guravannavar and S. Sudarshan and Ajit A.
                 Diwan and Ch. Sobhan Babu",
  title =        "Which sort orders are interesting?",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "1",
  pages =        "145--165",
  month =        feb,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0240-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jan 31 06:48:57 MST 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sort orders play an important role in query
                 evaluation. Algorithms that rely on sorting are widely
                 used to implement joins, grouping, duplicate
                 elimination and other set operations. The notion of
                 interesting orders has allowed query optimizers to
                 consider plans that could be locally sub-optimal, but
                 produce ordered output beneficial for other operators,
                 and thus be part of a globally optimal plan. However,
                 the number of interesting orders for most operators is
                 factorial in the number of attributes involved.
                 Optimizer implementations use heuristics to prune the
                 number of interesting orders, but the quality of the
                 heuristics is unclear. Increasingly complex decision
                 support queries and increasing use of query-covering
                 indices, which provide multiple alternative sort orders
                 for relations, motivate us to better address the
                 problem of choosing interesting orders.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Atzeni:2012:SIB,
  author =       "Paolo Atzeni and Elisa Bertino and Masaru Kitsuregawa
                 and Kian-Lee Tan",
  title =        "Special issue: best papers of {VLDB 2010}",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "167--168",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0267-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bu:2012:HAL,
  author =       "Yingyi Bu and Bill Howe and Magdalena Balazinska and
                 Michael D. Ernst",
  title =        "The {HaLoop} approach to large-scale iterative data
                 analysis",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "169--190",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0269-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The growing demand for large-scale data mining and
                 data analysis applications has led both industry and
                 academia to design new types of highly scalable
                 data-intensive computing platforms. MapReduce has
                 enjoyed particular success. However, MapReduce lacks
                 built-in support for iterative programs, which arise
                 naturally in many applications including data mining,
                 web ranking, graph analysis, and model fitting. This
                 paper (This is an extended version of the VLDB 2010
                 paper ``HaLoop: Efficient Iterative Data Processing on
                 Large Clusters'' PVLDB 3(1):285---296, 2010.) presents
                 HaLoop, a modified version of the Hadoop MapReduce
                 framework, that is designed to serve these
                 applications. HaLoop allows iterative applications to
                 be assembled from existing Hadoop programs without
                 modification, and significantly improves their
                 efficiency by providing inter-iteration caching
                 mechanisms and a loop-aware scheduler to exploit these
                 caches. HaLoop retains the fault-tolerance properties
                 of MapReduce through automatic cache recovery and task
                 re-execution. We evaluated HaLoop on a variety of real
                 applications and real datasets. Compared with Hadoop,
                 on average, HaLoop improved runtimes by a factor of
                 1.85 and shuffled only 4 \% as much data between
                 mappers and reducers in the applications that we
                 tested.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Alexe:2012:MCI,
  author =       "Bogdan Alexe and Mauricio Hern{\'a}ndez and Lucian
                 Popa and Wang-Chiew Tan",
  title =        "{MapMerge}: correlating independent schema mappings",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "191--211",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0264-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "One of the main steps toward integration or exchange
                 of data is to design the mappings that describe the
                 (often complex) relationships between the source
                 schemas or formats and the desired target schema. In
                 this paper, we introduce a new operator, called
                 MapMerge, that can be used to correlate multiple,
                 independently designed schema mappings of smaller scope
                 into larger schema mappings. This allows a more modular
                 construction of complex mappings from various types of
                 smaller mappings such as schema correspondences
                 produced by a schema matcher or pre-existing mappings
                 that were designed by either a human user or via
                 mapping tools. In particular, the new operator also
                 enables a new ``divide-and-merge'' paradigm for mapping
                 creation, where the design is divided (on purpose) into
                 smaller components that are easier to create and
                 understand and where MapMerge is used to automatically
                 generate a meaningful overall mapping. We describe our
                 MapMerge algorithm and demonstrate the feasibility of
                 our implementation on several real and synthetic
                 mapping scenarios. In our experiments, we make use of a
                 novel similarity measure between two database instances
                 with different schemas that quantifies the preservation
                 of data associations. We show experimentally that
                 MapMerge improves the quality of the schema mappings,
                 by significantly increasing the similarity between the
                 input source instance and the generated target
                 instance. Finally, we provide a new algorithm that
                 combines MapMerge with schema mapping composition to
                 correlate flows of schema mappings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fan:2012:TCF,
  author =       "Wenfei Fan and Jianzhong Li and Shuai Ma and Nan Tang
                 and Wenyuan Yu",
  title =        "Towards certain fixes with editing rules and master
                 data",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "213--238",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0253-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A variety of integrity constraints have been studied
                 for data cleaning. While these constraints can detect
                 the presence of errors, they fall short of guiding us
                 to correct the errors. Indeed, data repairing based on
                 these constraints may not find certain fixes that are
                 guaranteed correct, and worse still, may even introduce
                 new errors when attempting to repair the data. We
                 propose a method for finding certain fixes, based on
                 master data, a notion of certain regions, and a class
                 of editing rules. A certain region is a set of
                 attributes that are assured correct by the users. Given
                 a certain region and master data, editing rules tell us
                 what attributes to fix and how to update them. We show
                 how the method can be used in data monitoring and
                 enrichment. We also develop techniques for reasoning
                 about editing rules, to decide whether they lead to a
                 unique fix and whether they are able to fix all the
                 attributes in a tuple, relative to master data and a
                 certain region. Furthermore, we present a framework and
                 an algorithm to find certain fixes, by interacting with
                 the users to ensure that one of the certain regions is
                 correct. We experimentally verify the effectiveness and
                 scalability of the algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Johnson:2012:SWA,
  author =       "Ryan Johnson and Ippokratis Pandis and Radu Stoica and
                 Manos Athanassoulis and Anastasia Ailamaki",
  title =        "Scalability of write-ahead logging on multicore and
                 multisocket hardware",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "239--263",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0260-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The shift to multi-core and multi-socket hardware
                 brings new challenges to database systems, as the
                 software parallelism determines performance. Even
                 though database systems traditionally accommodate
                 simultaneous requests, a multitude of synchronization
                 barriers serialize execution. Write-ahead logging is a
                 fundamental, omnipresent component in ARIES-style
                 concurrency and recovery, and one of the most important
                 yet-to-be addressed potential bottlenecks, especially
                 in OLTP workloads making frequent small changes to
                 data. In this paper, we identify four logging-related
                 impediments to database system scalability. Each issue
                 challenges different level in the software
                 architecture: (a) the high volume of small-sized I/O
                 requests may saturate the disk, (b) transactions hold
                 locks while waiting for the log flush, (c) extensive
                 context switching overwhelms the OS scheduler with
                 threads executing log I/Os, and (d) contention appears
                 as transactions serialize accesses to in-memory log
                 data structures. We demonstrate these problems and
                 address them with techniques that, when combined,
                 comprise a holistic, scalable approach to logging. Our
                 solution achieves a 20---69\% speedup over a modern
                 database system when running log-intensive workloads,
                 such as the TPC-B and TATP benchmarks, in a
                 single-socket multiprocessor server. Moreover, it
                 achieves log insert throughput over 2.2 GB/s for small
                 log records on the single-socket server, roughly 20
                 times higher than the traditional way of accessing the
                 log using a single mutex. Furthermore, we investigate
                 techniques on scaling the performance of logging to
                 multi-socket servers. We present a set of optimizations
                 which partly ameliorate the latency penalty that comes
                 with multi-socket hardware, and then we investigate the
                 feasibility of applying a distributed log buffer design
                 at the socket level.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2012:AUP,
  author =       "Su Chen and Beng Chin Ooi and Zhenjie Zhang",
  title =        "An adaptive updating protocol for reducing moving
                 object database workload",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "2",
  pages =        "265--286",
  month =        apr,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0257-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Apr 23 08:02:21 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In the last decade, spatio-temporal database research
                 focuses on the design of effective and efficient
                 indexing structures in support of location-based
                 queries such as predictive range queries and nearest
                 neighbor queries. While a variety of indexing
                 techniques have been proposed to accelerate the
                 processing of updates and queries, not much attention
                 has been paid to the updating protocol, which is
                 another important factor affecting the system
                 performance. In this paper, we propose a generic and
                 adaptive updating protocol for moving object databases
                 with less number of updates between objects and the
                 database server, thereby reducing the overall workload
                 of the system. In contrast to the approach adopted by
                 most conventional moving object database systems where
                 the exact locations and velocities last disclosed are
                 used to predict their motions, we propose the concept
                 of Spatio-temporal safe region to approximate possible
                 future locations. Spatio-temporal safe regions provide
                 larger space of tolerance for moving objects, freeing
                 them from location and velocity updates as long as the
                 errors remain predictable in the database. To answer
                 predictive queries accurately, the server is allowed to
                 probe the latest status of objects when their safe
                 regions are inadequate in returning the exact query
                 results. Spatio-temporal safe regions are calculated
                 and optimized by the database server with two
                 contradictory objectives: reducing update workload
                 while guaranteeing query accuracy and efficiency. To
                 achieve this, we propose a cost model that estimates
                 the composition of active and passive updates based on
                 historical motion records and query distribution. More
                 system performance improvements can be obtained by
                 cutting more updates from the clients, when the users
                 of system are comfortable with incomplete but accuracy
                 bounded query results. We have conducted extensive
                 experiments to evaluate our proposal on a variety of
                 popular indexing structures. The results confirm the
                 viability, robustness, accuracy and efficiency of our
                 proposed protocol.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fusco:2012:RTC,
  author =       "Francesco Fusco and Michail Vlachos and Marc Ph.
                 Stoecklin",
  title =        "Real-time creation of bitmap indexes on streaming
                 network data",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "287--307",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0242-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "High-speed archival and indexing solutions of
                 streaming traffic are growing in importance for
                 applications such as monitoring, forensic analysis, and
                 auditing. Many large institutions require fast
                 solutions to support expedient analysis of historical
                 network data, particularly in case of security
                 breaches. However, ``turning back the clock'' is not a
                 trivial task. The first major challenge is that such a
                 technology needs to support data archiving under
                 extremely high-speed insertion rates. Moreover, the
                 archives created have to be stored in a compressed
                 format that is still amenable to indexing and search.
                 The above requirements make general-purpose databases
                 unsuitable for this task and dedicated solutions are
                 required. This work describes a solution for high-speed
                 archival storage, indexing, and data querying on
                 network flow information. We make the two following
                 important contributions: (a) we propose a novel
                 compressed bitmap index approach that significantly
                 reduces both CPU load and disk consumption and, (b) we
                 introduce an online stream reordering mechanism that
                 further reduces space requirements and improves the
                 time for data retrieval. The reordering methodology is
                 based on the principles of locality-sensitive hashing
                 (LSH) and also of interest for other bitmap creation
                 techniques. Because of the synergy of these two
                 components, our solution can sustain data insertion
                 rates that reach 500,000--1 million records per
                 second. To put these numbers into perspective, typical
                 commercial network flow solutions can currently process
                 20,000--60,000 flows per second. In addition, our
                 system offers interactive query response times that
                 enable administrators to perform complex analysis tasks
                 on the fly. Our technique is directly amenable to
                 parallel execution, allowing its application in domains
                 that are challenged by large volumes of historical
                 measurement data, such as network auditing, traffic
                 behavior analysis, and large-scale data visualization
                 in service provider networks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gordevicus:2012:PTA,
  author =       "Juozas Gordevi{\v{c}}us and Johann Gamper and Michael
                 B{\"o}hlen",
  title =        "Parsimonious temporal aggregation",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "309--332",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0243-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Temporal aggregation is an important operation in
                 temporal databases, and different variants thereof have
                 been proposed. In this paper, we introduce a novel
                 temporal aggregation operator, termed parsimonious
                 temporal aggregation (PTA), that overcomes major
                 limitations of existing approaches. PTA takes the
                 result of instant temporal aggregation (ITA) of size n,
                 which might be up to twice as large as the argument
                 relation, and merges similar tuples until a given error
                 ( {\epsilon} ) or size ( c ) bound is reached. The new
                 operator is data-adaptive and allows the user to
                 control the trade-off between the result size and the
                 error introduced by merging. For the precise evaluation
                 of PTA queries, we propose two dynamic
                 programming---based algorithms for size- and
                 error-bounded queries, respectively, with a worst-case
                 complexity that is quadratic in n. We present two
                 optimizations that take advantage of temporal gaps and
                 different aggregation groups and achieve a linear
                 runtime in experiments with real-world data. For the
                 quick computation of an approximate PTA answer, we
                 propose an efficient greedy merging strategy with a
                 precision that is upper bounded by O (log n ). We
                 present two algorithms that implement this strategy and
                 begin to merge as ITA tuples are produced. They require
                 O ( n log ( c + {\ss} )) time and O ( c + {\ss} )
                 space, where {\ss} is the size of a read-ahead buffer
                 and is typically very small. An empirical evaluation on
                 real-world and synthetic data shows that PTA
                 considerably reduces the size of the aggregation
                 result, yet introducing only small errors. The greedy
                 algorithms are scalable for large data sets and
                 introduce less error than other approximation
                 techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hore:2012:SMR,
  author =       "Bijit Hore and Sharad Mehrotra and Mustafa Canim and
                 Murat Kantarcioglu",
  title =        "Secure multidimensional range queries over outsourced
                 data",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "333--358",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0245-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study the problem of supporting
                 multidimensional range queries on encrypted data. The
                 problem is motivated by secure data outsourcing
                 applications where a client may store his/her data on a
                 remote server in encrypted form and want to execute
                 queries using server's computational capabilities. The
                 solution approach is to compute a secure indexing tag
                 of the data by applying bucketization (a generic form
                 of data partitioning) which prevents the server from
                 learning exact values but still allows it to check if a
                 record satisfies the query predicate. Queries are
                 evaluated in an approximate manner where the returned
                 set of records may contain some false positives. These
                 records then need to be weeded out by the client which
                 comprises the computational overhead of our scheme. We
                 develop a bucketization procedure for answering
                 multidimensional range queries on multidimensional
                 data. For a given bucketization scheme, we derive cost
                 and disclosure-risk metrics that estimate client's
                 computational overhead and disclosure risk
                 respectively. Given a multidimensional dataset, its
                 bucketization is posed as an optimization problem where
                 the goal is to minimize the risk of disclosure while
                 keeping query cost (client's computational overhead)
                 below a certain user-specified threshold value. We
                 provide a tunable data bucketization algorithm that
                 allows the data owner to control the trade-off between
                 disclosure risk and cost. We also study the trade-off
                 characteristics through an extensive set of experiments
                 on real and synthetic data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hose:2012:SSP,
  author =       "Katja Hose and Akrivi Vlachou",
  title =        "A survey of skyline processing in highly distributed
                 environments",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "359--384",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0246-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "During the last decades, data management and storage
                 have become increasingly distributed. Advanced query
                 operators, such as skyline queries, are necessary in
                 order to help users to handle the huge amount of
                 available data by identifying a set of interesting data
                 objects. Skyline query processing in highly distributed
                 environments poses inherent challenges and demands and
                 requires non-traditional techniques due to the
                 distribution of content and the lack of global
                 knowledge. This paper surveys this interesting and
                 still evolving research area, so that readers can
                 easily obtain an overview of the state-of-the-art. We
                 outline the objectives and the main principles that any
                 distributed skyline approach has to fulfill, leading to
                 useful guidelines for developing algorithms for
                 distributed skyline processing. We review in detail
                 existing approaches that are applicable for highly
                 distributed environments, clarify the assumptions of
                 each approach, and provide a comparative performance
                 analysis. Moreover, we study the skyline variants each
                 approach supports. Our analysis leads to a taxonomy of
                 existing approaches. Finally, we present interesting
                 research topics on distributed skyline computation that
                 have not yet been explored.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gong:2012:EMU,
  author =       "Jian Gong and Reynold Cheng and David W. Cheung",
  title =        "Efficient management of uncertainty in {XML} schema
                 matching",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "385--409",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0248-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Despite advances in machine learning technologies a
                 schema matching result between two database schemas
                 (e.g., those derived from COMA++) is likely to be
                 imprecise. In particular, numerous instances of
                 ``possible mappings'' between the schemas may be
                 derived from the matching result. In this paper, we
                 study problems related to managing possible mappings
                 between two heterogeneous XML schemas. First, we study
                 how to efficiently generate possible mappings for a
                 given schema matching task. While this problem can be
                 solved by existing algorithms, we show how to improve
                 the performance of the solution by using a
                 divide-and-conquer approach. Second, storing and
                 querying a large set of possible mappings can incur
                 large storage and evaluation overhead. For XML schemas,
                 we observe that their possible mappings often exhibit a
                 high degree of overlap. We hence propose a novel data
                 structure, called the block tree, to capture the
                 commonalities among possible mappings. The block tree
                 is useful for representing the possible mappings in a
                 compact manner and can be efficiently generated.
                 Moreover, it facilitates the evaluation of a
                 probabilistic twig query (PTQ), which returns the
                 non-zero probability that a fragment of an XML document
                 matches a given query. For users who are interested
                 only in answers with k -highest probabilities, we also
                 propose the top- k PTQ and present an efficient
                 solution for it. An extensive evaluation on real-world
                 data sets shows that our approaches significantly
                 improve the efficiency of generating, storing, and
                 querying possible mappings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cao:2012:SSA,
  author =       "Yu Cao and Ramadhana Bramandia and Chee-Yong Chan and
                 Kian-Lee Tan",
  title =        "Sort-sharing-aware query processing",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "3",
  pages =        "411--436",
  month =        jun,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0251-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Jun 26 17:39:07 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many database applications require sorting a table (or
                 relation) over multiple sort orders. Some examples
                 include creation of multiple indices on a relation,
                 generation of multiple reports from a table, evaluation
                 of a complex query that involves multiple instances of
                 a relation, and batch processing of a set of queries.
                 In this paper, we study how to optimize multiple
                 sortings of a table. We investigate the correlation
                 between sort orders and exploit sort-sharing techniques
                 of reusing the (partial) work done to sort a table on a
                 particular order for another order. Specifically, we
                 introduce a novel and powerful evaluation technique,
                 called cooperative sorting, that enables sort sharing
                 between seemingly non-related sort orders.
                 Subsequently, given a specific set of sort orders, we
                 determine the best combination of various sort-sharing
                 techniques so as to minimize the total processing cost.
                 We also develop techniques to make a traditional query
                 optimizer extensible so that it will not miss the truly
                 cheapest execution plan with the sort-sharing (post-)
                 optimization turned on. We demonstrate the efficiency
                 of our ideas with a prototype implementation in
                 PostgreSQL and evaluate the performance using both
                 TPC-DS benchmark and synthetic data. Our experimental
                 results show significant performance improvement over
                 the traditional evaluation scheme.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Feng:2012:TJT,
  author =       "Jianhua Feng and Jiannan Wang and Guoliang Li",
  title =        "Trie-join: a trie-based method for efficient string
                 similarity joins",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "437--461",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0252-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A string similarity join finds similar pairs between
                 two collections of strings. Many applications, e.g.,
                 data integration and cleaning, can significantly
                 benefit from an efficient string-similarity-join
                 algorithm. In this paper, we study string similarity
                 joins with edit-distance constraints. Existing methods
                 usually employ a filter-and-refine framework and suffer
                 from the following limitations: (1) They are
                 inefficient for the data sets with short strings (the
                 average string length is not larger than 30); (2) They
                 involve large indexes; (3) They are expensive to
                 support dynamic update of data sets. To address these
                 problems, we propose a novel method called trie-join,
                 which can generate results efficiently with small
                 indexes. We use a trie structure to index the strings
                 and utilize the trie structure to efficiently find
                 similar string pairs based on subtrie pruning. We
                 devise efficient trie-join algorithms and pruning
                 techniques to achieve high performance. Our method can
                 be easily extended to support dynamic update of data
                 sets efficiently. We conducted extensive experiments on
                 four real data sets. Experimental results show that our
                 algorithms outperform state-of-the-art methods by an
                 order of magnitude on the data sets with short
                 strings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Augsten:2012:WGA,
  author =       "Nikolaus Augsten and Michael B{\"o}hlen and Curtis
                 Dyreson and Johann Gamper",
  title =        "Windowed $pq$-grams for approximate joins of
                 data-centric {XML}",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "463--488",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0254-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In data integration applications, a join matches
                 elements that are common to two data sources. Since
                 elements are represented slightly different in each
                 source, an approximate join must be used to do the
                 matching. For XML data, most existing approximate join
                 strategies are based on some ordered tree matching
                 technique, such as the tree edit distance. In
                 data-centric XML, however, the sibling order is
                 irrelevant, and two elements should match even if their
                 subelement order varies. Thus, approximate joins for
                 data-centric XML must leverage unordered tree matching
                 techniques. This is computationally hard since the
                 algorithms cannot rely on a predefined sibling order.
                 In this paper, we give a solution for approximate joins
                 based on unordered tree matching. The core of our
                 solution are windowed pq-grams which are small subtrees
                 of a specific shape. We develop an efficient technique
                 to generate windowed pq -grams in a three-step process:
                 sort the tree, extend the sorted tree with dummy nodes,
                 and decompose the extended tree into windowed pq
                 -grams. The windowed pq -grams distance between two
                 trees is the number of pq -grams that are in one tree
                 decomposition only. We show that our distance is a
                 pseudo-metric and empirically demonstrate that it
                 effectively approximates the unordered tree edit
                 distance. The approximate join using windowed pq -grams
                 can be efficiently implemented as an equality join on
                 strings, which avoids the costly computation of the
                 distance between every pair of input trees. Experiments
                 with synthetic and real world data confirm the analytic
                 results and show the effectiveness and efficiency of
                 our technique.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2012:ESM,
  author =       "Xiangmin Zhou and Xiaofang Zhou and Lei Chen and
                 Athman Bouguettaya",
  title =        "Efficient subsequence matching over large video
                 databases",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "489--508",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0255-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Video similarity matching has broad applications such
                 as copyright detection, news tracking and commercial
                 monitoring, etc. Among these applications, one typical
                 task is to detect the local similarity between two
                 videos without the knowledge on positions and lengths
                 of each matched subclip pair. However, most studies so
                 far on video detection investigate the global
                 similarity between two short clips using a pre-defined
                 distance function. Although there are a few works on
                 video subsequence detection, all these proposals fail
                 to provide an effective query processing mechanism. In
                 this paper, we first generalize the problem of video
                 similarity matching. Then, a novel solution called
                 consistent keyframe matching (CKM) is proposed to solve
                 the problem of subsequence matching based on video
                 segmentation. CKM is designed with two goals: (1) good
                 scalability in terms of the query sequence length and
                 the size of video database and (2) fast video
                 subsequence matching in terms of processing time. Good
                 scalability is achieved by employing a batch query
                 paradigm, where keyframes sharing the same query space
                 are summarized and ordered. As such, the redundancy of
                 data access is eliminated, leading to much faster video
                 query processing. Fast subsequence matching is achieved
                 by comparing the keyframes of different video
                 sequences. Specifically, a keyframe matching graph is
                 first constructed and then divided into matched
                 candidate subgraphs. We have evaluated our proposed
                 approach over a very large real video database.
                 Extensive experiments demonstrate the effectiveness and
                 efficiency of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yildirim:2012:GSI,
  author =       "Hilmi Yildirim and Vineet Chaoji and Mohammed J.
                 Zaki",
  title =        "{GRAIL}: a scalable index for reachability queries in
                 very large graphs",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "509--534",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0256-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a large directed graph, rapidly answering
                 reachability queries between source and target nodes is
                 an important problem. Existing methods for reachability
                 tradeoff indexing time and space versus query time
                 performance. However, the biggest limitation of
                 existing methods is that they do not scale to very
                 large real-world graphs. We present a simple yet
                 scalable reachability index, called GRAIL, that is
                 based on the idea of randomized interval labeling and
                 that can effectively handle very large graphs. Based on
                 an extensive set of experiments, we show that while
                 more sophisticated methods work better on small graphs,
                 GRAIL is the only index that can scale to millions of
                 nodes and edges. GRAIL has linear indexing time and
                 space, and the query time ranges from constant time to
                 being linear in the graph order and size. Our reference
                 C++ implementations are open source and available for
                 download at http://www.code.google.com/p/grail/.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xu:2012:EES,
  author =       "Jia Xu and Zhenjie Zhang and Anthony K. Tung and Ge
                 Yu",
  title =        "Efficient and effective similarity search over
                 probabilistic data based on {Earth Mover's Distance}",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "535--559",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0258-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Advances in geographical tracking, multimedia
                 processing, information extraction, and sensor networks
                 have created a deluge of probabilistic data. While
                 similarity search is an important tool to support the
                 manipulation of probabilistic data, it raises new
                 challenges to traditional relational databases. The
                 problem stems from the limited effectiveness of the
                 distance metrics employed by existing database systems.
                 On the other hand, several more complicated distance
                 operators have proven their values for better
                 distinguishing ability in specific probabilistic
                 domains. In this paper, we discuss the similarity
                 search problem with respect to Earth Mover's Distance
                 (EMD). EMD is the most successful distance metric for
                 probability distribution comparison but is an expensive
                 operator as it has cubic time complexity. We present a
                 new database indexing approach to answer EMD-based
                 similarity queries, including range queries and
                 $k$-nearest neighbor queries on probabilistic data. Our
                 solution utilizes primal-dual theory from linear
                 programming and employs a group of B$^+$ trees for
                 effective candidate pruning. We also apply our
                 filtering technique to the processing of continuous
                 similarity queries, especially with applications to
                 frame copy detection in real-time videos. Extensive
                 experiments show that our proposals dramatically
                 improve the usefulness and scalability of probabilistic
                 data management.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2012:HOA,
  author =       "Rui Zhang and Jianzhong Qi and Dan Lin and Wei Wang
                 and Raymond Chi-Wing Wong",
  title =        "A highly optimized algorithm for continuous
                 intersection join queries over moving objects",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "4",
  pages =        "561--586",
  month =        aug,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0259-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 20 14:56:19 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given two sets of moving objects with nonzero extents,
                 the continuous intersection join query reports every
                 pair of intersecting objects, one from each of the two
                 moving object sets, for every timestamp. This type of
                 queries is important for a number of applications,
                 e.g., in the multi-billion dollar computer game
                 industry, massively multiplayer online games like World
                 of Warcraft need to monitor the intersection among
                 players' attack ranges and render players' interaction
                 in real time. The computational cost of a
                 straightforward algorithm or an algorithm adapted from
                 another query type is prohibitive, and answering the
                 query in real time poses a great challenge. Those
                 algorithms compute the query answer for either too long
                 or too short a time interval, which results in either a
                 very large computation cost per answer update or too
                 frequent answer updates, respectively. This observation
                 motivates us to optimize the query processing in the
                 time dimension. In this study, we achieve this
                 optimization by introducing the new concept of
                 time-constrained (TC) processing. Further, TC
                 processing enables a set of effective improvement
                 techniques on traditional intersection join algorithms.
                 Finally, we provide a method to find the optimal value
                 for an important parameter required in our technique,
                 the maximum update interval. As a result, we achieve a
                 highly optimized algorithm for processing continuous
                 intersection join queries on moving objects. With a
                 thorough experimental study, we show that our algorithm
                 outperforms the best adapted existing solution by
                 several orders of magnitude. We also validate the
                 accuracy of our cost model and its effectiveness in
                 optimizing the performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lehner:2012:SSL,
  author =       "Wolfgang Lehner and Michael J. Franklin",
  title =        "Special section on large-scale analytics",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "587--588",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0291-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wolf:2012:OSM,
  author =       "Joel Wolf and Andrey Balmin and Deepak Rajan and
                 Kirsten Hildrum and Rohit Khandekar and Sujay Parekh
                 and Kun-Lung Wu and Rares Vernica",
  title =        "On the optimization of schedules for {MapReduce}
                 workloads in the presence of shared scans",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "589--609",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0279-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider MapReduce clusters designed to support
                 multiple concurrent jobs, concentrating on environments
                 in which the number of distinct datasets is modest
                 relative to the number of jobs. In such scenarios, many
                 individual datasets are likely to be scanned
                 concurrently by multiple Map phase jobs. As has been
                 noticed previously, this scenario provides an
                 opportunity for Map phase jobs to cooperate, sharing
                 the scans of these datasets, and thus reducing the
                 costs of such scans. Our paper has three main
                 contributions over previous work. First, we present a
                 novel and highly general method for sharing scans and
                 thus amortizing their costs. This concept, which we
                 call cyclic piggybacking, has a number of advantages
                 over the more traditional batching scheme described in
                 the literature. Second, we notice that the various
                 subjobs generated in this manner can be assumed in an
                 optimal schedule to respect a natural chain precedence
                 ordering. Third, we describe a significant but natural
                 generalization of the recently introduced FLEX
                 scheduler for optimizing schedules within the context
                 of this cyclic piggybacking paradigm, which can be
                 tailored to a variety of cost metrics. Such cost
                 metrics include average response time, average stretch,
                 and any minimax-type metric--a total of 11 separate and
                 standard metrics in all. Moreover, most of this carries
                 over in the more general case of overlapping rather
                 than identical datasets as well, employing what we will
                 call semi-shared scans. In such scenarios, chain
                 precedence is replaced by arbitrary precedence, but we
                 can still handle 8 of the original 11 metrics. The
                 overall approach, including both cyclic piggybacking
                 and the FLEX scheduling generalization, is called
                 CIRCUMFLEX. We describe some practical implementation
                 strategies. And we evaluate the performance of
                 CIRCUMFLEX via a variety of simulation and real
                 benchmark experiments.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2012:SPD,
  author =       "Jingren Zhou and Nicolas Bruno and Ming-Chuan Wu and
                 Per-Ake Larson and Ronnie Chaiken and Darren Shakib",
  title =        "{SCOPE}: parallel databases meet {MapReduce}",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "611--636",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0280-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Companies providing cloud-scale data services have
                 increasing needs to store and analyze massive data
                 sets, such as search logs, click streams, and web graph
                 data. For cost and performance reasons, processing is
                 typically done on large clusters of tens of thousands
                 of commodity machines. Such massive data analysis on
                 large clusters presents new opportunities and
                 challenges for developing a highly scalable and
                 efficient distributed computation system that is easy
                 to program and supports complex system optimization to
                 maximize performance and reliability. In this paper, we
                 describe a distributed computation system, Structured
                 Computations Optimized for Parallel Execution (Scope),
                 targeted for this type of massive data analysis. Scope
                 combines benefits from both traditional parallel
                 databases and MapReduce execution engines to allow easy
                 programmability and deliver massive scalability and
                 high performance through advanced optimization. Similar
                 to parallel databases, the system has a SQL-like
                 declarative scripting language with no explicit
                 parallelism, while being amenable to efficient parallel
                 execution on large clusters. An optimizer is
                 responsible for converting scripts into efficient
                 execution plans for the distributed computation engine.
                 A physical execution plan consists of a directed
                 acyclic graph of vertices. Execution of the plan is
                 orchestrated by a job manager that schedules execution
                 on available machines and provides fault tolerance and
                 recovery, much like MapReduce systems. Scope is being
                 used daily for a variety of data analysis and data
                 mining applications over tens of thousands of machines
                 at Microsoft, powering Bing, and other online
                 services.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kang:2012:GEA,
  author =       "U. Kang and Hanghang Tong and Jimeng Sun and
                 Ching-Yung Lin and Christos Faloutsos",
  title =        "{{\tt gbase}}: an efficient analysis platform for
                 large graphs",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "637--650",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0283-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graphs appear in numerous applications including cyber
                 security, the Internet, social networks, protein
                 networks, recommendation systems, citation networks,
                 and many more. Graphs with millions or even billions of
                 nodes and edges are common-place. How to store such
                 large graphs efficiently? What are the core
                 operations/queries on those graph? How to answer the
                 graph queries quickly? We propose Gbase, an efficient
                 analysis platform for large graphs. The key novelties
                 lie in (1) our storage and compression scheme for a
                 parallel, distributed settings and (2) the carefully
                 chosen graph operations and their efficient
                 implementations. We designed and implemented an
                 instance of Gbase using MapReduce\slash Hadoop. Gbase
                 provides a parallel indexing mechanism for graph
                 operations that both saves storage space, as well as
                 accelerates query responses. We run numerous
                 experiments on real and synthetic graphs, spanning
                 billions of nodes and edges, and we show that our
                 proposed Gbase is indeed fast, scalable, and nimble,
                 with significant savings in space and time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tran:2012:CMP,
  author =       "Thanh T. Tran and Liping Peng and Yanlei Diao and
                 Andrew Mcgregor and Anna Liu",
  title =        "{CLARO}: modeling and processing uncertain data
                 streams",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "651--676",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0261-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Uncertain data streams, where data are incomplete and
                 imprecise, have been observed in many environments.
                 Feeding such data streams to existing stream systems
                 produces results of unknown quality, which is of
                 paramount concern to monitoring applications. In this
                 paper, we present the claro system that supports stream
                 processing for uncertain data naturally captured using
                 continuous random variables. claro employs a unique
                 data model that is flexible and allows efficient
                 computation. Built on this model, we develop evaluation
                 techniques for relational operators by exploring
                 statistical theory and approximation. We also consider
                 query planning for complex queries given an accuracy
                 requirement. Evaluation results show that our
                 techniques can achieve high performance while
                 satisfying accuracy requirements and outperform
                 state-of-the-art sampling methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Helmer:2012:MSS,
  author =       "Sven Helmer and Nikolaus Augsten and Michael
                 B{\"o}hlen",
  title =        "Measuring structural similarity of semistructured data
                 based on information-theoretic approaches",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "677--702",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0263-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose and experimentally evaluate different
                 approaches for measuring the structural similarity of
                 semistructured documents based on information-theoretic
                 concepts. Common to all approaches is a two-step
                 procedure: first, we extract and linearize the
                 structural information from documents, and then, we use
                 similarity measures that are based on, respectively,
                 Kolmogorov complexity and Shannon entropy to determine
                 the distance between the documents. Compared to other
                 approaches, we are able to achieve a linear run-time
                 complexity and demonstrate in an experimental
                 evaluation that the results of our technique in terms
                 of clustering quality are on a par with or even better
                 than those of other, slower approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cheema:2012:EPS,
  author =       "Muhammad Aamir Cheema and Wenjie Zhang and Xuemin Lin
                 and Ying Zhang",
  title =        "Efficiently processing snapshot and continuous reverse
                 $k$ nearest neighbors queries",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "703--728",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0265-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a set of objects and a query q, a point p is
                 called the reverse k nearest neighbor (R k NN) of q if
                 q is one of the k closest objects of p. In this paper,
                 we introduce the concept of influence zone that is the
                 area such that every point inside this area is the R k
                 NN of q and every point outside this area is not the R
                 k NN. The influence zone has several applications in
                 location-based services, marketing and decision support
                 systems. It can also be used to efficiently process R k
                 NN queries. First, we present efficient algorithm to
                 compute the influence zone. Then, based on the
                 influence zone, we present efficient algorithms to
                 process R k NN queries that significantly outperform
                 existing best-known techniques for both the snapshot
                 and continuous R k NN queries. We also present a
                 detailed theoretical analysis to analyze the area of
                 the influence zone and IO costs of our R k NN
                 processing algorithms. Our experiments demonstrate the
                 accuracy of our theoretical analysis. This paper is an
                 extended version of our previous work (Cheema et al. in
                 Proceedings of ICDE, pp. 577---588, 2011). We make the
                 following new contributions in this extended version:
                 (1) we conduct a rigorous complexity analysis and show
                 that the complexity of one of our proposed algorithms
                 in Cheema et al. (Proceedings of ICDE, pp. 577---588,
                 2011) can be reduced from O ( m$^2$ ) to O ( km ) where
                 m {$>$} k is the number of objects used to compute the
                 influence zone, (2) we show that our techniques can be
                 applied to dimensionality higher than two, and (3) we
                 present efficient techniques to handle data updates.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zheng:2012:SQP,
  author =       "Kai Zheng and Xiaofang Zhou and Pui Cheong Fung and
                 Kexin Xie",
  title =        "Spatial query processing for fuzzy objects",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "5",
  pages =        "729--751",
  month =        oct,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0266-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 22 09:44:31 MDT 2012",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Range and nearest neighbor queries are the most common
                 types of spatial queries, which have been investigated
                 extensively in the last decades due to its broad range
                 of applications. In this paper, we study this problem
                 in the context of fuzzy objects that have
                 indeterministic boundaries. Fuzzy objects play an
                 important role in many areas, such as biomedical image
                 databases and GIS communities. Existing research on
                 fuzzy objects mainly focuses on modeling basic fuzzy
                 object types and operations, leaving the processing of
                 more advanced queries largely untouched. In this paper,
                 we propose two new kinds of spatial queries for fuzzy
                 objects, namely single threshold query and continuous
                 threshold query, to determine the query results which
                 qualify at a certain probability threshold and within a
                 probability interval, respectively. For efficient
                 single threshold query processing, we optimize the
                 classical R-tree-based search algorithm by deriving
                 more accurate approximations for the distance function
                 between fuzzy objects and the query object. To enhance
                 the performance of continuous threshold queries,
                 effective pruning rules are developed to reduce the
                 search space and speed up the candidate refinement
                 process. The efficiency of our proposed algorithms as
                 well as the optimization techniques is verified with an
                 extensive set of experiments using both synthetic and
                 real datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2012:MFS,
  author =       "Jianzhong Li and Zhaonian Zou and Hong Gao",
  title =        "Mining frequent subgraphs over uncertain graph
                 databases under probabilistic semantics",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "753--777",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0268-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Frequent subgraph mining has been extensively studied
                 on certain graph data. However, uncertainty is
                 intrinsic in graph data in practice, but there is very
                 few work on mining uncertain graph data. This paper
                 focuses on mining frequent subgraphs over uncertain
                 graph data under the probabilistic semantics.
                 Specifically, a measure called $\varphi$-frequent
                 probability is introduced to evaluate the degree of
                 recurrence of subgraphs. Given a set of uncertain
                 graphs and two real numbers $0 < \varphi, \tau < 1$,
                 the goal is to quickly find all subgraphs with
                 $\varphi$-frequent probability at least $\tau$. Due to
                 the NP-hardness of the problem and to the \#P-hardness
                 of computing the $\varphi$-frequent probability of a
                 subgraph, an approximate mining algorithm is proposed
                 to produce an $(\varepsilon, \delta)$-approximate set
                 $\Pi$ of ``frequent subgraphs'', where $0 < \varepsilon
                 < \tau$ is error tolerance, and $0 < \delta < 1$ is a
                 confidence bound. The algorithm guarantees that (1) any
                 frequent subgraph $S$ is contained in $\Pi$ with
                 probability at least $((1 - \delta) /2)^s$, where $s$
                 is the number of edges in $S$; (2) any infrequent
                 subgraph with $\varphi$-frequent probability less than
                 $\tau - \varepsilon$ is contained in $\Pi$ with
                 probability at most $\delta /2$. The theoretical
                 analysis shows that to obtain any frequent subgraph
                 with probability at least $1 - \Delta$, the input
                 parameter \delta of the algorithm must be set to at
                 most $1 - 2 (1 - \Delta)^{1 / \ell_{\rm max}}$, where
                 $0 < \Delta < 1$, and $\ell_{\rm max}$ is the maximum
                 number of edges in frequent subgraphs. Extensive
                 experiments on real uncertain graph data verify that
                 the proposed algorithm is practically efficient and has
                 very high approximation quality. Moreover, the
                 difference between the probabilistic semantics and the
                 expected semantics on mining frequent subgraphs over
                 uncertain graph data has been discussed in this paper
                 for the first time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Vergoulis:2012:ARS,
  author =       "Thanasis Vergoulis and Theodore Dalamagas and Dimitris
                 Sacharidis and Timos Sellis",
  title =        "Approximate regional sequence matching for genomic
                 databases",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "779--795",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0270-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recent advances in computational biology have raised
                 sequence matching requirements that result in new types
                 of sequence database problems. In this work, we
                 introduce an important class of such problems, the
                 approximate regional sequence matching (ARSM) problem.
                 Given a data and a pattern sequence, an ARSM result is
                 an approximate occurrence of a region of the pattern in
                 the data sequence under two conditions. First, the
                 region must contain a predetermined area of the pattern
                 sequence, termed core. Second, the allowable deviation
                 between the region of the pattern and its occurrence in
                 the data sequence depends on the length of the region.
                 We propose the PS-ARSM method that processes
                 holistically the regions of a pattern, taking advantage
                 of their overlaps to efficiently identify the ARSM
                 results. Its performance is evaluated with respect to
                 existing techniques adapted to the ARSM problem.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wu:2012:FES,
  author =       "Dingming Wu and Gao Cong and Christian S. Jensen",
  title =        "A framework for efficient spatial web object
                 retrieval",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "797--822",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0271-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The conventional Internet is acquiring a geospatial
                 dimension. Web documents are being geo-tagged and
                 geo-referenced objects such as points of interest are
                 being associated with descriptive text documents. The
                 resulting fusion of geo-location and documents enables
                 new kinds of queries that take into account both
                 location proximity and text relevancy. This paper
                 proposes a new indexing framework for top-$k$ spatial
                 text retrieval. The framework leverages the inverted
                 file for text retrieval and the R-tree for spatial
                 proximity querying. Several indexing approaches are
                 explored within this framework. The framework
                 encompasses algorithms that utilize the proposed
                 indexes for computing location-aware as well as
                 region-aware top-$k$ text retrieval queries, thus
                 taking into account both text relevancy and spatial
                 proximity to prune the search space. Results of
                 empirical studies with an implementation of the
                 framework demonstrate that the paper's proposal is
                 capable of excellent performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Arenas:2012:QLB,
  author =       "Marcelo Arenas and Jorge P{\'e}rez and Juan Reutter
                 and Cristian Riveros",
  title =        "Query language-based inverses of schema mappings:
                 semantics, computation, and closure properties",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "823--842",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0272-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The inversion of schema mappings has been identified
                 as one of the fundamental operators for the development
                 of a general framework for metadata management. During
                 the last few years, three alternative notions of
                 inversion for schema mappings have been proposed
                 (Fagin-inverse (Fagin, TODS 32(4), 25:1---25:53, 2007),
                 quasi-inverse (Fagin et al., TODS 33(2), 11:1---11:52,
                 2008), and maximum recovery (Arenas et al., TODS 34(4),
                 22:1---22:48, 2009)). However, these notions lack some
                 fundamental properties that limit their practical
                 applicability: most of them are expressed in languages
                 including features that are difficult to use in
                 practice, some of these inverses are not guaranteed to
                 exist for mappings specified with source-to-target
                 tuple-generating dependencies (st-tgds), and it has
                 been futile to search for a meaningful mapping language
                 that is closed under any of these notions of inverse.
                 In this paper, we develop a framework for the inversion
                 of schema mappings that fulfills all of the above
                 requirements. It is based on the notion of
                 ${\mathcal{C}}$-maximum recovery, for a query language
                 ${\mathcal{C}}$, a notion designed to generate inverse
                 mappings that recover back only the information that
                 can be retrieved with queries in ${\mathcal{C}}$. By
                 focusing on the language of conjunctive queries (CQ),
                 we are able to find a mapping language that contains
                 the class of st-tgds, is closed under CQ-maximum
                 recovery, and for which the chase procedure can be used
                 to exchange data efficiently. Furthermore, we show that
                 our choices of inverse notion and mapping language are
                 optimal, in the sense that choosing a more expressive
                 inverse operator or mapping language causes the loss of
                 these properties.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bravo:2012:CRX,
  author =       "Loreto Bravo and James Cheney and Irini Fundulaki and
                 Ricardo Segovia",
  title =        "Consistency and repair for {XML} write-access control
                 policies",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "843--867",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0273-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "XML access control policies involving updates may
                 contain security flaws, here called inconsistencies, in
                 which a forbidden operation may be simulated by
                 performing a sequence of allowed operations. This
                 article investigates the problem of deciding whether a
                 policy is consistent, and if not, how its
                 inconsistencies can be repaired. We consider total and
                 partial policies expressed in terms of annotated
                 schemas defining which operations are allowed or denied
                 for the XML trees that are instances of the schema. We
                 show that consistency is decidable in PTIME for such
                 policies and that consistent partial policies can be
                 extended to unique least-privilege consistent total
                 policies. We also consider repair problems based on
                 deleting privileges to restore consistency, show that
                 finding minimal repairs is NP-complete, and give
                 heuristics for finding repairs. Finally, we
                 experimentally evaluate these algorithms in comparison
                 with an exact approach based on answer-set
                 programming.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chang:2012:EDD,
  author =       "Lijun Chang and Jeffrey Xu Yu and Lu Qin and Hong
                 Cheng and Miao Qiao",
  title =        "The exact distance to destination in undirected
                 world",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "869--888",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0274-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Shortest distance queries are essential not only in
                 graph analysis and graph mining tasks but also in
                 database applications, when a large graph needs to be
                 dealt with. Such shortest distance queries are
                 frequently issued by end-users or requested as a
                 subroutine in real applications. For intensive queries
                 on large graphs, it is impractical to compute shortest
                 distances on-line from scratch, and impractical to
                 materialize all-pairs shortest distances. In the
                 literature, 2-hop distance labeling is proposed to
                 index the all-pairs shortest distances. It assigns
                 distance labels to vertices in a large graph in a
                 pre-computing step off-line and then answers shortest
                 distance queries on-line by making use of such distance
                 labels, which avoids exhaustively traversing the large
                 graph when answering queries. However, the existing
                 algorithms to generate 2-hop distance labels are not
                 scalable to large graphs. Finding an optimal 2-hop
                 distance labeling is NP-hard, and heuristic algorithms
                 may generate large size distance labels while still
                 needing to pre-compute all-pairs shortest paths. In
                 this paper, we propose a multi-hop distance labeling
                 approach, which generates a subset of the 2-hop
                 distance labels as index off-line. We can compute the
                 multi-hop distance labels efficiently by avoiding
                 pre-computing all-pairs shortest paths. In addition,
                 our multi-hop distance labeling is small in size to be
                 stored. To answer a shortest distance query between two
                 vertices, we first generate the query-specific small
                 set of 2-hop distance labels for the two vertices based
                 on our multi-hop distance labels stored and compute the
                 shortest distance between the two vertices based on the
                 2-hop distance labels generated on-line. We conducted
                 extensive performance studies on large real graphs and
                 confirmed the efficiency of our multi-hop distance
                 labeling scheme.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Soh:2012:AEE,
  author =       "Kheng Hong Soh and Ba Quan Truong and Sourav S.
                 Bhowmick",
  title =        "{ANDES}: efficient evaluation of {NOT}-twig queries in
                 relational databases",
  journal =      j-VLDB-J,
  volume =       "21",
  number =       "6",
  pages =        "889--914",
  month =        dec,
  year =         "2012",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0275-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jan 5 08:04:46 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Despite a large body of work on XPath query processing
                 in relational environment, systematic study of queries
                 containing not-predicates have received little
                 attention in the literature. Particularly, several xml
                 supports of industrial-strength commercial rdbms fail
                 to efficiently evaluate such queries. In this paper, we
                 present an efficient and novel strategy to evaluate
                 NOT-twig queries in a tree-unaware relational
                 environment. NOT-twig queries are XPath queries with
                 ancestor --- descendant and parent --- child axis and
                 contain one or more not-predicates. We propose a novel
                 Dewey-based encoding scheme called Andes ( ANcestor
                 Dewey-based Encoding Scheme), which enables us to
                 efficiently filter out elements satisfying a
                 not-predicate by comparing their ancestor group
                 identifiers. In this approach, a set of elements under
                 the same common ancestor at a specific level in the xml
                 tree is assigned same ancestor group identifier. Based
                 on this scheme, we propose a novel sql translation
                 algorithm for NOT-twig query evaluation. Experiments
                 carried out confirm that our proposed approach built on
                 top of an off-the-shelf commercial rdbms significantly
                 outperforms state-of-the-art relational and native
                 approaches. We also explore the query plans selected by
                 a commercial relational optimizer to evaluate our
                 translated queries in different input cardinality. Such
                 exploration further validates the performance benefits
                 of Andes.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lehner:2013:SIB,
  author =       "Wolfgang Lehner and Sunita Sarawagi",
  title =        "Special issue on best papers of {VLDB 2011}",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0301-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tzoumas:2013:EAG,
  author =       "Kostas Tzoumas and Amol Deshpande and Christian S.
                 Jensen",
  title =        "Efficiently adapting graphical models for selectivity
                 estimation",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "3--27",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0293-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Query optimizers rely on statistical models that
                 succinctly describe the underlying data. Models are
                 used to derive cardinality estimates for intermediate
                 relations, which in turn guide the optimizer to choose
                 the best query execution plan. The quality of the
                 resulting plan is highly dependent on the accuracy of
                 the statistical model that represents the data. It is
                 well known that small errors in the model estimates
                 propagate exponentially through joins, and may result
                 in the choice of a highly sub-optimal query execution
                 plan. Most commercial query optimizers make the
                 attribute value independence assumption: all attributes
                 are assumed to be statistically independent. This
                 reduces the statistical model of the data to a
                 collection of one-dimensional synopses (typically in
                 the form of histograms), and it permits the optimizer
                 to estimate the selectivity of a predicate conjunction
                 as the product of the selectivities of the constituent
                 predicates. However, this independence assumption is
                 more often than not wrong, and is considered to be the
                 most common cause of sub-optimal query execution plans
                 chosen by modern query optimizers. We take a step
                 towards a principled and practical approach to
                 performing cardinality estimation without making the
                 independence assumption. By carefully using concepts
                 from the field of graphical models, we are able to
                 factor the joint probability distribution over all the
                 attributes in the database into small, usually
                 two-dimensional distributions, without a significant
                 loss in estimation accuracy. We show how to efficiently
                 construct such a graphical model from the database
                 using only two-way join queries, and we show how to
                 perform selectivity estimation in a highly efficient
                 manner. We integrate our algorithms into the PostgreSQL
                 DBMS. Experimental results indicate that estimation
                 errors can be greatly reduced, leading to orders of
                 magnitude more efficient query execution plans in many
                 cases. Optimization time is kept in the range of tens
                 of milliseconds, making this a practical approach for
                 industrial-strength query optimizers.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Minhas:2013:RTH,
  author =       "Umar Farooq Minhas and Shriram Rajagopalan and Brendan
                 Cully and Ashraf Aboulnaga and Kenneth Salem and Andrew
                 Warfield",
  title =        "{RemusDB}: transparent high availability for database
                 systems",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "29--45",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0294-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we present a technique for building a
                 high-availability (HA) database management system
                 (DBMS). The proposed technique can be applied to any
                 DBMS with little or no customization, and with
                 reasonable performance overhead. Our approach is based
                 on Remus, a commodity HA solution implemented in the
                 virtualization layer, that uses asynchronous virtual
                 machine state replication to provide transparent HA and
                 failover capabilities. We show that while Remus and
                 similar systems can protect a DBMS, database workloads
                 incur a performance overhead of up to 32\% as compared
                 to an unprotected DBMS. We identify the sources of this
                 overhead and develop optimizations that mitigate the
                 problems. We present an experimental evaluation using
                 two popular database systems and industry standard
                 benchmarks showing that for certain workloads, our
                 optimized approach provides fast failover ($\leq 3$s of
                 downtime) with low performance overhead when compared
                 to an unprotected DBMS. Our approach provides a
                 practical means for existing, deployed database systems
                 to be made more reliable with a minimum of risk, cost,
                 and effort. Furthermore, this paper invites new
                 discussion about whether the complexity of HA is best
                 implemented within the DBMS, or as a service by the
                 infrastructure below it.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Furche:2013:OLS,
  author =       "Tim Furche and Georg Gottlob and Giovanni Grasso and
                 Christian Schallhart and Andrew Sellers",
  title =        "{OXPath}: a language for scalable data extraction,
                 automation, and crawling on the deep web",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "47--72",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0286-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The evolution of the web has outpaced itself: A
                 growing wealth of information and increasingly
                 sophisticated interfaces necessitate automated
                 processing, yet existing automation and data extraction
                 technologies have been overwhelmed by this very growth.
                 To address this trend, we identify four key
                 requirements for web data extraction, automation, and
                 (focused) web crawling: (1) interact with sophisticated
                 web application interfaces, (2) precisely capture the
                 relevant data to be extracted, (3) scale with the
                 number of visited pages, and (4) readily embed into
                 existing web technologies. We introduce OXPath as an
                 extension of XPath for interacting with web
                 applications and extracting data thus
                 revealed --- matching all the above
                 requirements. OXPath's page-at-a-time evaluation
                 guarantees memory use independent of the number of
                 visited pages, yet remains polynomial in time. We
                 experimentally validate the theoretical complexity and
                 demonstrate that OXPath's resource consumption is
                 dominated by page rendering in the underlying
                 browser. With an extensive study of sublanguages and
                 properties of OXPath, we pinpoint the effect of
                 specific features on evaluation performance.  Our
                 experiments show that OXPath outperforms existing
                 commercial and academic data extraction tools by a wide
                 margin.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Curino:2013:ADS,
  author =       "Carlo Curino and Hyun Jin Moon and Alin Deutsch and
                 Carlo Zaniolo",
  title =        "Automating the database schema evolution process",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "73--98",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0302-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Supporting database schema evolution represents a
                 long-standing challenge of practical and theoretical
                 importance for modern information systems. In this
                 paper, we describe techniques and systems for
                 automating the critical tasks of migrating the database
                 and rewriting the legacy applications. In addition to
                 labor saving, the benefits delivered by these advances
                 are many and include reliable prediction of outcome,
                 minimization of downtime, system-produced
                 documentation, and support for archiving, historical
                 queries, and provenance. The PRISM/PRISM++ system
                 delivers these benefits, by solving the difficult
                 problem of automating the migration of databases and
                 the rewriting of queries and updates. In this paper, we
                 present the PRISM/PRISM++ system and the novel
                 technology that made it possible. In particular, we
                 focus on the difficult and previously unsolved problem
                 of supporting legacy queries and updates under schema
                 and integrity constraints evolution. The PRISM/PRISM++
                 approach consists in providing the users with a set of
                 SQL-based Schema Modification Operators (SMOs), which
                 describe how the tables in the old schema are modified
                 into those in the new schema. In order to support
                 updates, SMOs are extended with integrity constraints
                 modification operators. By using recent results on
                 schema mapping, the paper (i) characterizes the impact
                 on integrity constraints of structural schema changes,
                 (ii) devises representations that enable the rewriting
                 of updates, and (iii) develop a unified approach for
                 query and update rewriting under constraints. We
                 complement the system with two novel tools: the first
                 automatically collects and provides statistics on
                 schema evolution histories, whereas the second derives
                 equivalent sequences of SMOs from the migration scripts
                 that were used for schema upgrades. These tools were
                 used to produce an extensive testbed containing 15
                 evolution histories of scientific databases and web
                 information systems, providing over 100 years of
                 aggregate evolution histories and almost 2,000 schema
                 evolution steps.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ramesh:2013:KSF,
  author =       "Aditya Ramesh and S. Sudarshan and Purva Joshi and
                 Manisha Naik Gaonkar",
  title =        "Keyword search on form results",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "1",
  pages =        "99--123",
  month =        feb,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0287-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 24 06:07:36 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In recent years there has been a good deal of research
                 in the area of keyword search on structured and
                 semistructured data. Most of this body of work has a
                 significant limitation in the context of enterprise
                 data, since it ignores the application code that has
                 often been carefully designed to present data in a
                 meaningful fashion to users. In this work, we consider
                 how to perform keyword search on enterprise
                 applications, which provide a number of forms that can
                 take parameters; parameters may be explicit, or
                 implicit such as the identifier of the user. In the
                 context of such applications, the goal of keyword
                 search is, given a set of keywords, to retrieve forms
                 along with corresponding parameter values, such that
                 result of each retrieved form executed on the
                 corresponding retrieved parameter values will contain
                 the specified keywords. Some earlier work in this area
                 was based on creating keyword indices on form results,
                 but there are problems in maintaining such indices in
                 the face of updates. In contrast, we propose techniques
                 based on creating inverted SQL queries from the SQL
                 queries in the forms. Unlike earlier work, our
                 techniques do not require any special purpose indices
                 and instead make use of standard text indices supported
                 by most database systems. We have implemented our
                 techniques and show that keyword search can run at
                 reasonable speeds even on large databases with a
                 significant number of forms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Dieng:2013:MFC,
  author =       "Cheikh Tidiane Dieng and Tao-Yuan Jen and Dominique
                 Laurent and Nicolas Spyratos",
  title =        "Mining frequent conjunctive queries using functional
                 and inclusion dependencies",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "125--150",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0277-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We address the issue of mining frequent conjunctive
                 queries in a relational database, a problem known to be
                 intractable even for conjunctive queries over a single
                 table. In this article, we show that mining frequent
                 projection-selection-join queries becomes tractable if
                 joins are performed along keys and foreign keys, in a
                 database satisfying functional and inclusion
                 dependencies, under certain restrictions. We note that
                 these restrictions cover most practical cases,
                 including databases operating over star schemas,
                 snow-flake schemas and constellation schemas. In our
                 approach, we define an equivalence relation over
                 queries using a pre-ordering with respect to which the
                 support is shown to be anti-monotonic. We propose a
                 level-wise algorithm for computing all frequent queries
                 by exploiting the fact that equivalent queries have the
                 same support. We report on experiments showing that, in
                 our context, mining frequent projection-selection-join
                 queries is indeed tractable, even for large data
                 sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tozun:2013:SDB,
  author =       "Pinar T{\"o}z{\"u}n and Ippokratis Pandis and Ryan
                 Johnson and Anastasia Ailamaki",
  title =        "Scalable and dynamically balanced shared-everything
                 {OLTP} with physiological partitioning",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "151--175",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0278-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Scaling the performance of shared-everything
                 transaction processing systems to highly parallel
                 multicore hardware remains a challenge for database
                 system designers. Recent proposals alleviate locking
                 and logging bottlenecks in the system, leaving page
                 latching as the next potential problem. To tackle the
                 page latching problem, we propose physiological
                 partitioning (PLP). PLP applies logical-only
                 partitioning, maintaining the desired properties of
                 sharedeverything designs, and introduces a multi-rooted
                 B+Tree index structure (MRBTree) that enables the
                 partitioning of the accesses at the physical page
                 level. Logical partitioning and MRBTrees together
                 ensure that all accesses to a given index page come
                 from a single thread and, hence, can be entirely latch
                 free; an extended design makes heap page accesses
                 thread private as well. Moreover, MRBTrees offer an
                 infrastructure for easy repartitioning and allow us to
                 have a lightweight dynamic load balancing mechanism
                 (DLB) on top of PLP. Profiling a PLP prototype running
                 on different multicore machines shows that it acquires
                 85 and 68\%fewer contentious critical sections,
                 respectively, than an optimized conventional design and
                 one based on logical-only partitioning. PLP also
                 improves performance up to almost 50 \% over the
                 existing systems, while DLB enhances the system with
                 rapid and robust behavior in both detecting and
                 handling load imbalances.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wu:2013:SXS,
  author =       "Xiaoying Wu and Dimitri Theodoratos",
  title =        "A survey on {XML} streaming evaluation techniques",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "177--202",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0281-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "XML is currently the most popular format for
                 exchanging and representing data on the web. It is used
                 in various applications and for different types of data
                 including structured, semistructured, and unstructured
                 heterogeneous data types. During the period, XML was
                 establishing itself, data streaming applications have
                 gained increased attention and importance. Because of
                 these developments, the querying and efficient
                 processing of XML streams has became a central issue.
                 In this study, we survey the state of the art in XML
                 streaming evaluation techniques. We focus on both the
                 streaming evaluation of XPath expressions and of XQuery
                 queries. We classify the XPath streaming evaluation
                 approaches according to the main data structure used
                 for the evaluation into three categories:
                 automaton-based approach, array-based approach, and
                 stack-based approach. We review, analyze, and compare
                 the major techniques proposed for each approach. We
                 also review multiple query streaming evaluation
                 techniques. For the XQuery streaming evaluation
                 problem, we identify and discuss four processing
                 paradigms adopted by the existing XQuery stream query
                 engines: the transducer-based paradigm, the
                 algebra-based paradigm, the automata-algebra paradigm,
                 and the pull-based paradigm. In addition, we review
                 optimization techniques for XQuery streaming
                 evaluation. We address the problem of optimizing XQuery
                 streaming evaluation as a buffer optimization problem.
                 For all techniques discussed, we describe the research
                 issues and the proposed algorithms and we compare them
                 with other relevant suggested techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lu:2013:ADU,
  author =       "Wentian Lu and Gerome Miklau and Neil Immerman",
  title =        "Auditing a database under retention policies",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "203--228",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0282-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Auditing the changes to a database is critical for
                 identifying malicious behavior, maintaining data
                 quality, and improving system performance. But an
                 accurate audit log is an historical record of the past
                 that can also pose a serious threat to privacy.
                 Policies that limit data retention conflict with the
                 goal of accurate auditing, and data owners have to
                 carefully balance the need for policy compliance with
                 the goal of accurate auditing. In this paper, we
                 provide a framework for auditing the changes to a
                 database system while respecting data retention
                 policies. Our framework includes an historical data
                 model that supports flexible audit queries, along with
                 a language for retention policies that can hide
                 individual attribute values or remove entire tuples
                 from the history. Under retention policies, the audit
                 history is partially incomplete. Thus, audit queries on
                 the protected history can include imprecise results. We
                 propose two different models (a tuple-independent model
                 and a tuple-correlated model) for formalizing the
                 meaning of audit queries. We implement policy
                 application and query answering efficiently in a
                 standard relational system and characterize the cases
                 where accurate auditing can be achieved under retention
                 restrictions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yuan:2013:LLB,
  author =       "Dayu Yuan and Prasenjit Mitra",
  title =        "{Lindex}: a lattice-based index for graph databases",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "229--252",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0284-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Subgraph querying has wide applications in various
                 fields such as cheminformatics and bioinformatics.
                 Given a query graph, q, a subgraph-querying algorithm
                 retrieves all graphs, D ( q ), which have q as a
                 subgraph, from a graph database, D. Subgraph querying
                 is costly because it uses subgraph isomorphism tests,
                 which are NP-complete. Graph indices are commonly used
                 to improve the performance of subgraph querying in
                 graph databases. Subgraph-querying algorithms first
                 construct a candidate answer set by filtering out a set
                 of false answers and then verify each candidate graph
                 using subgraph isomorphism tests. To build graph
                 indices, various kinds of substructure (subgraph,
                 subtree, or path) features have been proposed with the
                 goal of maximizing the filtering rate. Each of them
                 works with a specifically designed index structure, for
                 example, discriminative and frequent subgraph features
                 work with gIndex, `? -TCFG features work with FG-index,
                 etc. We propose Lindex, a graph index, which indexes
                 subgraphs contained in database graphs. Nodes in Lindex
                 represent key-value pairs where the key is a subgraph
                 in a database and the value is a list of database
                 graphs containing the key. We propose two heuristics
                 that are used in the construction of Lindex that allows
                 us to determine answers to subgraph queries conducting
                 less subgraph isomorphism tests. Consequently, Lindex
                 improves subgraph-querying efficiency. In addition,
                 Lindex is compatible with any choice of features.
                 Empirically, we demonstrate that Lindex used in
                 conjunction with subgraph indexing features proposed in
                 previous works outperforms other specifically designed
                 index structures. As a novel index structure, Lindex
                 (1) is effective in filtering false graphs (2) provides
                 fast index lookups, (3) is fast with respect to index
                 construction and maintenance, and (4) can be
                 constructed using any set of substructure index
                 features. These four properties result in a fast and
                 scalable subgraph-querying infrastructure. We
                 substantiate the benefits of Lindex and its
                 disk-resident variation Lindex+ theoretically and
                 empirically.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Song:2013:CDH,
  author =       "Shaoxu Song and Lei Chen and Philip S. Yu",
  title =        "Comparable dependencies over heterogeneous data",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "2",
  pages =        "253--274",
  month =        apr,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0285-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 29 15:54:45 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "To study the data dependencies over heterogeneous data
                 in dataspaces, we define a general dependency form,
                 namely comparable dependencies (CDS), which specifies
                 constraints on comparable attributes. It covers the
                 semantics of a broad class of dependencies in
                 databases, including functional dependencies (FDS),
                 metric functional dependencies (MFDS), and matching
                 dependencies (MDS). As we illustrated, comparable
                 dependencies are useful in real practice of dataspaces,
                 such as semantic query optimization. Due to
                 heterogeneous data in dataspaces, the first question,
                 known as the validation problem, is to tell whether a
                 dependency (almost) holds in a data instance.
                 Unfortunately, as we proved, the validation problem
                 with certain error or confidence guarantee is generally
                 hard. In fact, the confidence validation problem is
                 also NP-hard to approximate to within any constant
                 factor. Nevertheless, we develop several approaches for
                 efficient approximation computation, such as greedy and
                 randomized approaches with an approximation bound on
                 the maximum number of violations that an object may
                 introduce. Finally, through an extensive experimental
                 evaluation on real data, we verify the superiority of
                 our methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Qiao:2013:CWC,
  author =       "Miao Qiao and Hong Cheng and Lu Qin and Jeffrey Xu Yu
                 and Philip S. Yu and Lijun Chang",
  title =        "Computing weight constraint reachability in large
                 networks",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "275--294",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0288-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Reachability is a fundamental problem on large-scale
                 networks emerging nowadays in various application
                 domains, such as social networks, communication
                 networks, biological networks, road networks, etc. It
                 has been studied extensively. However, little existing
                 work has studied reachability with realistic
                 constraints imposed on graphs with real-valued edge or
                 node weights. In fact, such weights are very common in
                 many real-world networks, for example, the bandwidth of
                 a link in communication networks, the reliability of an
                 interaction between two proteins in PPI networks, and
                 the handling capacity of a warehouse/storage point in a
                 distribution network. In this paper, we formalize a new
                 yet important reachability query in weighted undirected
                 graphs, called weight constraint reachability (WCR)
                 query that asks: is there a path between nodes a and b
                 , on which each real-valued edge (or node) weight
                 satisfies a range constraint. We discover an
                 interesting property of WCR, based on which, we design
                 a novel edge-based index structure to answer the WCR
                 query in O(1) time. Furthermore, we consider the case
                 when the index cannot entirely fit in the memory, which
                 can be very common for emerging massive networks. An
                 I/O-efficient index is proposed, which provides
                 constant I/O (precisely four I/Os) query time with
                 O(|V|\log |V|) disk-based index size. Extensive
                 experimental studies on both real and synthetic
                 datasets demonstrate the efficiency and scalability of
                 our solutions in answering the WCR query.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Toyoda:2013:PDD,
  author =       "Machiko Toyoda and Yasushi Sakurai and Yoshiharu
                 Ishikawa",
  title =        "Pattern discovery in data streams under the time
                 warping distance",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "295--318",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0289-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Subsequence matching is a basic problem in the field
                 of data stream mining. In recent years, there has been
                 significant research effort spent on efficiently
                 finding subsequences similar to a query sequence.
                 Another challenging issue in relation to subsequence
                 matching is how we identify common local patterns when
                 both sequences are evolving. This problem arises in
                 trend detection, clustering, and outlier detection.
                 Dynamic time warping (DTW) is often used for
                 subsequence matching and is a powerful similarity
                 measure. However, the straightforward method using DTW
                 incurs a high computation cost for this problem. In
                 this paper, we propose a one-pass algorithm,
                 CrossMatch, that achieves the above goal. CrossMatch
                 addresses two important challenges: (1) how can we
                 identify common local patterns efficiently without any
                 omission? (2) how can we find common local patterns in
                 data stream processing? To tackle these challenges,
                 CrossMatch incorporates three ideas: (1) a scoring
                 function, which computes the DTW distance indirectly to
                 reduce the computation cost, (2) a position matrix,
                 which stores starting positions to keep track of common
                 local patterns in a streaming fashion, and (3) a
                 streaming algorithm, which identifies common local
                 patterns efficiently and outputs them on the fly. We
                 provide a theoretical analysis and prove that our
                 algorithm does not sacrifice accuracy. Our experimental
                 evaluation and case studies show that CrossMatch can
                 incrementally discover common local patterns in data
                 streams within constant time (per update) and space.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xie:2013:UDV,
  author =       "Xike Xie and Reynold Cheng and Man Lung Yiu and Liwen
                 Sun and Jinchuan Chen",
  title =        "{UV-diagram}: a {Voronoi} diagram for uncertain
                 spatial databases",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "319--344",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0290-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Voronoi diagram is an important technique for
                 answering nearest-neighbor queries for spatial
                 databases. We study how the Voronoi diagram can be used
                 for uncertain spatial data, which are inherent in
                 scientific and business applications. Specifically, we
                 propose the Uncertain-Voronoi diagram (or UV-diagram ),
                 which divides the data space into disjoint
                 ``UV-partitions''. Each UV-partition P is associated
                 with a set S of objects, such that any point q located
                 in P has the set S as its nearest neighbor with nonzero
                 probabilities. The UV-diagram enables queries that
                 return objects with nonzero chances of being the
                 nearest neighbor (NN) of a given point q . It supports
                 ``continuous nearest-neighbor search'', which refreshes
                 the set of NN objects of q , as the position of q
                 changes. It also allows the analysis of
                 nearest-neighbor information, for example, to find out
                 the number of objects that are the nearest neighbors of
                 any point in a given area. A UV-diagram requires
                 exponential construction and storage costs. To tackle
                 these problems, we devise an alternative representation
                 of a UV-diagram, by using a set of UV-cells. A UV-cell
                 of an object o is the extent e for which o can be the
                 nearest neighbor of any point q \in e . We study how to
                 speed up the derivation of UV-cells by considering its
                 nearby objects. We also use the UV-cells to design the
                 UV-index, which supports different queries, and can be
                 constructed in polynomial time. We have performed
                 extensive experiments on both real and synthetic data
                 to validate the efficiency of our approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2013:HEQ,
  author =       "Yuanyuan Zhu and Lu Qin and Jeffrey Xu Yu and Yiping
                 Ke and Xuemin Lin",
  title =        "High efficiency and quality: large graphs matching",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "345--368",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0292-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graph matching plays an essential role in many real
                 applications. In this paper, we study how to match two
                 large graphs by maximizing the number of matched edges,
                 which is known as maximum common subgraph matching and
                 is NP-hard. To find exact matching, it cannot a graph
                 with more than 30 nodes. To find an approximate
                 matching, the quality can be very poor. We propose a
                 novel two-step approach that can efficiently match two
                 large graphs over thousands of nodes with high matching
                 quality. In the first step, we propose an
                 anchor-selection/expansion approach to compute a good
                 initial matching. In the second step, we propose a new
                 approach to refine the initial matching. We give the
                 optimality of our refinement and discuss how to
                 randomly refine the matching with different
                 combinations. We further show how to extend our
                 solution to handle labeled graphs. We conducted
                 extensive testing using real and synthetic datasets and
                 report our findings in this paper.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Baca:2013:OEG,
  author =       "Radim Baca and Michal Kr{\'a}tk{\'y} and Tok Wang Ling
                 and Jiaheng Lu",
  title =        "Optimal and efficient generalized twig pattern
                 processing: a combination of preorder and postorder
                 filterings",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "369--393",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0295-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Searching for occurrences of a twig pattern query
                 (TPQ) in an XML document is a core task of all XML
                 database query languages. The generalized twig pattern
                 (GTP) extends the TPQ model to include semantics
                 related to output nodes, optional nodes, and boolean
                 expressions which are part of the XQuery language.
                 Preorder filtering holistic algorithms such as
                 TwigStack represent a significant class of TPQ
                 processing approaches with a linear worst-case I/O
                 complexity with respect to the sum of the input and
                 output sizes for some query classes. Another important
                 class of holistic approaches is represented by
                 postorder filtering holistic algorithms such as \text{
                 Twig}^2 Stack which introduced a linear output
                 enumeration time with respect to the result size. In
                 this article, we introduce a holistic algorithm called
                 GTPStack which is the first approach capable of
                 processing a GTP with a linear worst-case I/O
                 complexity with respect to the GTP result size. This is
                 achieved by using a combination of the preorder and
                 postorder filterings before storing nodes in an
                 intermediate storage. Additionally, another
                 contribution of this article is an introduction of a
                 new perspective of holistic algorithm optimality. We
                 show that the optimality depends not only on a query
                 class but also on XML document characteristics. This
                 new view on the optimality extends the general
                 knowledge about the type of queries for which the
                 holistic algorithms are optimal. Moreover, it allows us
                 to determine that GTPStack is optimal for any GTP when
                 a specific XML document is considered. We present a
                 comprehensive experimental study of the
                 state-of-the-art holistic algorithms showing under
                 which conditions GTPStack outperforms the other
                 holistic approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Silva:2013:SQT,
  author =       "Yasin N. Silva and Walid G. Aref and Per-Ake Larson
                 and Spencer S. Pearson and Mohamed H. Ali",
  title =        "Similarity queries: their conceptual evaluation,
                 transformations, and processing",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "3",
  pages =        "395--420",
  month =        jun,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0296-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:10 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many application scenarios can significantly benefit
                 from the identification and processing of similarities
                 in the data. Even though some work has been done to
                 extend the semantics of some operators, for example
                 join and selection, to be aware of data similarities,
                 there has not been much study on the role and
                 implementation of similarity-aware operations as
                 first-class database operators. Furthermore, very
                 little work has addressed the problem of evaluating and
                 optimizing queries that combine several similarity
                 operations. The focus of this paper is the study of
                 similarity queries that contain one or multiple
                 first-class similarity database operators such as
                 Similarity Selection, Similarity Join, and Similarity
                 Group-by. Particularly, we analyze the implementation
                 techniques of several similarity operators, introduce a
                 consistent and comprehensive conceptual evaluation
                 model for similarity queries, and present a rich set of
                 transformation rules to extend cost-based query
                 optimization to the case of similarity queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Dindar:2013:MES,
  author =       "Nihal Dindar and Nesime Tatbul and Ren{\'e}e J. Miller
                 and Laura M. Haas and Irina Botan",
  title =        "Modeling the execution semantics of stream processing
                 engines with {SECRET}",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "421--446",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0297-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "There are many academic and commercial stream
                 processing engines (SPEs) today, each of them with its
                 own execution semantics. This variation may lead to
                 seemingly inexplicable differences in query results. In
                 this paper, we present SECRET, a model of the behavior
                 of SPEs. SECRET is a descriptive model that allows
                 users to analyze the behavior of systems and understand
                 the results of window-based queries (with time- and
                 tuple-based windows) for a broad range of heterogeneous
                 SPEs. The model is the result of extensive analysis and
                 experimentation with several commercial and academic
                 engines. In the paper, we describe the types of
                 heterogeneity found in existing engines and show with
                 experiments on real systems that our model can explain
                 the key differences in windowing behavior.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Elghandour:2013:RXP,
  author =       "Iman Elghandour and Ashraf Aboulnaga and Daniel C.
                 Zilio and Calisto Zuzarte",
  title =        "Recommending {XML} physical designs for {XML}
                 databases",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "447--470",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0298-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Database systems employ physical structures such as
                 indexes and materialized views to improve query
                 performance, potentially by orders of magnitude. It is
                 therefore important for a database administrator to
                 choose the appropriate configuration of these physical
                 structures for a given database. XML database systems
                 are increasingly being used to manage semi-structured
                 data, and XML support has been added to commercial
                 database systems. In this paper, we address the problem
                 of automatic physical design for XML databases, which
                 is the process of automatically selecting the best set
                 of physical structures for a database and a query
                 workload. We focus on recommending two types of
                 physical structures: XML indexes and relational
                 materialized views of XML data. We present a design
                 advisor for recommending XML indexes, one for
                 recommending materialized views, and an integrated
                 design advisor that recommends both indexes and
                 materialized views. A key characteristic of our
                 advisors is that they are tightly coupled with the
                 query optimizer of the database system, and they rely
                 on the optimizer for enumerating and evaluating
                 physical designs. We have implemented our advisors in a
                 prototype version of IBM DB2 V9, and we experimentally
                 demonstrate the effectiveness of their recommendations
                 using this implementation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mazuran:2013:EPD,
  author =       "Mirjana Mazuran and Edoardo Serra and Carlo Zaniolo",
  title =        "Extending the power of datalog recursion",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "471--493",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0299-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Supporting aggregates in recursive logic rules
                 represents a very important problem for Datalog. To
                 solve this problem, we propose a simple extension,
                 called DatalogFS (Datalog extended with frequency
                 support goals), that supports queries and reasoning
                 about the number of distinct variable assignments
                 satisfying given goals, or conjunctions of goals, in
                 rules. This monotonic extension greatly enhances the
                 power of Datalog, while preserving (i) its declarative
                 semantics and (ii) its amenability to efficient
                 implementation via differential fixpoint and other
                 optimization techniques presented in the paper. Thus,
                 DatalogFS enables the efficient formulation of queries
                 that could not be expressed efficiently or could not be
                 expressed at all in Datalog with stratified negation
                 and aggregates. In fact, using a generalized notion of
                 multiplicity called frequency, we show that diffusion
                 models and page rank computations can be easily
                 expressed and efficiently implemented using DatalogFS
                 .",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Galpin:2013:QAO,
  author =       "Ixent Galpin and Alvaro A. Fernandes and Norman W.
                 Paton",
  title =        "{QoS}-aware optimization of sensor network queries",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "495--517",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0300-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The resource-constrained nature of mote-level wireless
                 sensor networks (WSNs) poses challenges for the design
                 of a general-purpose sensor network query processors
                 (SNQPs). Existing SNQPs tend to generate query
                 execution plans (QEPs) that are selected on the basis
                 of a fixed, implicit expectation, for example, that
                 energy consumption should be kept as small as possible.
                 However, in WSN applications, the same query may be
                 subject to several, possibly conflicting,
                 quality-of-service (QoS) expectations concomitantly
                 (for example maximizing data acquisition rates subject
                 to keeping energy consumption low). It is also not
                 uncommon for the QoS expectations to change over the
                 lifetime of a deployment (for example from low to high
                 data acquisition rates). This paper describes
                 optimization algorithms that respond to stated QoS
                 expectations (about acquisition rate, delivery time,
                 energy consumption and lifetime) when making routing,
                 placement, and timing decisions for in-WSN query
                 processing. The paper shows experimentally that
                 QoS-awareness offers significant benefits in responding
                 to, and reconciling, diverse QoS expectations, thereby
                 enabling QoS-aware SNQPs to generate efficient QEPs for
                 a broader range WSN applications than has hitherto been
                 possible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deutch:2013:TQW,
  author =       "Daniel Deutch and Tova Milo and Neoklis Polyzotis",
  title =        "Top-$k$ queries over {Web} applications",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "519--542",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0303-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The core logic of web applications that suggest some
                 particular service, such as online shopping, e-commerce
                 etc., is typically captured by Business Processes
                 (BPs). Among all the (maybe infinitely many) possible
                 execution flows of a BP, analysts are often interested
                 in identifying flows that are ``most important'',
                 according to some weight metric. The goal of the
                 present paper is to provide efficient algorithms for
                 top-$k$ query evaluation over the possible executions
                 of Business Processes, under some given weight
                 function. Unique difficulties in top-$k$ analysis in
                 this settings stem from (1) the fact that the number of
                 possible execution flows of a given BP is typically
                 very large, or even infinite in presence of recursion
                 and (2) that the weights (e.g., likelihood, monetary
                 cost, etc.) induced by actions performed during the
                 execution (e.g., product purchase) may be
                 inter-dependent (due to probabilistic dependencies,
                 combined discount deals etc.). We exemplify these
                 difficulties, and overcome them to provide efficient
                 algorithms for query evaluation where possible. We also
                 describe in details an application prototype that we
                 have developed for recommending optimal navigation in
                 an online shopping web site that is based on our model
                 and algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2013:OSD,
  author =       "Jun Gao and Jeffrey Xu Yu and Ruoming Jin and Jiashuai
                 Zhou and Tengjiao Wang and Dongqing Yang",
  title =        "Outsourcing shortest distance computing with privacy
                 protection",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "543--559",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0304-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the advent of cloud computing, it becomes
                 desirable to outsource graphs into cloud servers to
                 efficiently perform complex operations without
                 compromising their sensitive information. In this
                 paper, we take the shortest distance computation as a
                 case to investigate the technique issues in outsourcing
                 graph operations. We first propose a parameter-free,
                 edge-based 2-HOP delegation security model (shorten as
                 2-HOP delegation model), which can greatly reduce the
                 chances of the structural pattern attack and the graph
                 reconstruction attack. We then transform the original
                 graph into a link graph $ G_l $ kept locally and a set
                 of outsourced graphs $ \mathcal G_o $. Our objectives
                 include (i) ensuring each outsourced graph meeting the
                 requirement of 2-HOP delegation model, (ii) making
                 shortest distance queries be answered using $ G_l $ and
                 $ \mathcal G_o $, (iii) minimizing the space cost of $
                 G_l $. We devise a greedy method to produce $ G_l $ and
                 $ \mathcal G_o $, which can exactly answer shortest
                 distance queries. We also develop an efficient
                 transformation method to support approximate shortest
                 distance answering under a given average additive error
                 bound. The experimental results illustrate the
                 effectiveness and efficiency of our method.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kalashnikov:2013:SEF,
  author =       "Dmitri V. Kalashnikov",
  title =        "{Super-EGO}: fast multi-dimensional similarity join",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "4",
  pages =        "561--585",
  month =        aug,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-012-0305-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 17 17:37:16 MDT 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Efficient processing of high-dimensional similarity
                 joins plays an important role for a wide variety of
                 data-driven applications. In this paper, we consider
                 \varepsilon -join variant of the problem. Given two d
                 -dimensional datasets and parameter \varepsilon , the
                 task is to find all pairs of points, one from each
                 dataset that are within \varepsilon distance from each
                 other. We propose a new \varepsilon -join algorithm,
                 called Super-EGO, which belongs the EGO family of join
                 algorithms. The new algorithm gains its advantage by
                 using novel data-driven dimensionality re-ordering
                 technique, developing a new EGO-strategy that more
                 aggressively avoids unnecessary computation, as well as
                 by developing a parallel version of the algorithm. We
                 study the newly proposed Super-EGO algorithm on large
                 real and synthetic datasets. The empirical study
                 demonstrates significant advantage of the proposed
                 solution over the existing state of the art
                 techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Brambilla:2013:SIS,
  author =       "Marco Brambilla and Stefano Ceri and Alon Halevy",
  title =        "Special issue on structured and crowd-sourced data on
                 the {Web}",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "587--588",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0327-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Goasdoue:2013:GTT,
  author =       "Fran{\c{c}}ois Goasdou{\'e} and Konstantinos Karanasos
                 and Yannis Katsis and Julien Leblay and Ioana Manolescu
                 and Stamatis Zampetakis",
  title =        "Growing triples on trees: an {XML--RDF} hybrid model
                 for annotated documents",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "589--613",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0321-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Since the beginning of the Semantic Web initiative,
                 significant efforts have been invested in finding
                 efficient ways to publish, store, and query metadata on
                 the Web. RDF and SPARQL have become the standard data
                 model and query language, respectively, to describe
                 resources on the Web. Large amounts of RDF data are now
                 available either as stand-alone datasets or as metadata
                 over semi-structured (typically XML) documents. The
                 ability to apply RDF annotations over XML data
                 emphasizes the need to represent and query data and
                 metadata simultaneously. We propose XR, a novel hybrid
                 data model capturing the structural aspects of XML data
                 and the semantics of RDF, also enabling us to reason
                 about XML data. Our model is general enough to describe
                 pure XML or RDF datasets, as well as RDF-annotated XML
                 data, where any XML node can act as a resource. This
                 data model comes with the XRQ query language that
                 combines features of both XQuery and SPARQL. To
                 demonstrate the feasibility of this hybrid XML-RDF data
                 management setting, and to validate its interest, we
                 have developed an XR platform on top of well-known data
                 management systems for XML and RDF. In particular, the
                 platform features several XRQ query processing
                 algorithms, whose performance is experimentally
                 compared.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Furche:2013:OKA,
  author =       "Tim Furche and Georg Gottlob and Giovanni Grasso and
                 Xiaonan Guo and Giorgio Orsi and Christian Schallhart",
  title =        "The ontological key: automatically understanding and
                 integrating forms to access the deep {Web}",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "615--640",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0323-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Forms are our gates to the Web. They enable us to
                 access the deep content of Web sites. Automatic form
                 understanding provides applications, ranging from
                 crawlers over meta-search engines to service
                 integrators, with a key to this content. Yet, it has
                 received little attention other than as component in
                 specific applications such as crawlers or meta-search
                 engines. No comprehensive approach to form
                 understanding exists, let alone one that produces rich
                 models for semantic services or integration with linked
                 open data. In this paper, we present opal, the first
                 comprehensive approach to form understanding and
                 integration. We identify form labeling and form
                 interpretation as the two main tasks involved in form
                 understanding. On both problems, opal advances the
                 state of the art: For form labeling, it combines
                 features from the text, structure, and visual rendering
                 of a Web page. In extensive experiments on the ICQ and
                 TEL-8 benchmarks and a set of 200 modern Web forms,
                 opal outperforms previous approaches for form labeling
                 by a significant margin. For form interpretation, opal
                 uses a schema (or ontology) of forms in a given domain.
                 Thanks to this domain schema, it is able to produce
                 nearly perfect ($ \gg 97 $ \% accuracy in the
                 evaluation domains) form interpretations. Yet, the
                 effort to produce a domain schema is very low, as we
                 provide a datalog-based template language that eases
                 the specification of such schemata and a methodology
                 for deriving a domain schema largely automatically from
                 an existing domain ontology. We demonstrate the value
                 of opal's form interpretations through a light-weight
                 form integration system that successfully translates
                 and distributes master queries to hundreds of forms
                 with no error, yet is implemented with only a handful
                 translation rules.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bozzon:2013:ESF,
  author =       "Alessandro Bozzon and Marco Brambilla and Stefano Ceri
                 and Davide Mazza",
  title =        "Exploratory search framework for {Web} data sources",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "641--663",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0326-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Exploratory search is an information seeking behavior
                 where users progressively learn about one or more
                 topics of interest; it departs quite radically from
                 traditional keyword-based query paradigms, as it
                 combines querying and browsing of resources, and covers
                 activities such as investigating, evaluating,
                 comparing, and synthesizing retrieved information. In
                 most cases, such activities are enabled by a conceptual
                 description of information in terms of entities and
                 their semantic relationships. Customized Web
                 applications, where few applicative entities and their
                 relationships are embedded within the application
                 logics, typically provide some support to exploratory
                 search, which is, however, specific for a given domain.
                 In this paper, we describe a general-purpose
                 exploratory search framework, i.e., a framework which
                 is neutral to the application logic. Our contribution
                 consists of the formalization of the exploratory search
                 paradigm over Web data sources, accessed by means of
                 services; extracted information is described by means
                 of an entity-relationship schema, which masks the
                 service implementations. Exploratory interaction is
                 supported by a general-purpose user interface including
                 a set of widgets for data exploration, from big tables
                 to atomic tables, visual diagrams, and geographic maps;
                 the user interaction is translated to queries defined
                 in \mathcal S \hbox {e}\mathcal C \hbox {oQL} S e C oQL
                 , a SQL-like language and protocol specifically
                 designed for supporting exploratory search over data
                 sources. We illustrate the software architecture of our
                 prototype, which uses the interplay of a query and
                 result management system with an orchestrator, capable
                 of incrementally building queries and of walking
                 through the past navigation history. The distinctive
                 feature of the framework is the ability to extract top
                 solutions, which combine top-ranked entity instances.
                 We evaluate exploratory search from the end-user
                 perspective in the context of a cognitive model for
                 search, by studying the user's behavior and the
                 effectiveness of exploratory search in terms of quality
                 of results produced by the search process; we also
                 compare the effectiveness of interaction in using our
                 multi-domain search system with the use of various
                 replicas of the system, each acting upon a single
                 domain, and with the use of conventional search
                 engines.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Demartini:2013:LSL,
  author =       "Gianluca Demartini and Djellel Eddine Difallah and
                 Philippe Cudr{\'e}-Mauroux",
  title =        "Large-scale linked data integration using
                 probabilistic reasoning and crowdsourcing",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "665--687",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0324-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We tackle the problems of semiautomatically matching
                 linked data sets and of linking large collections of
                 Web pages to linked data. Our system, ZenCrowd, (1)
                 uses a three-stage blocking technique in order to
                 obtain the best possible instance matches while
                 minimizing both computational complexity and latency,
                 and (2) identifies entities from natural language text
                 using state-of-the-art techniques and automatically
                 connects them to the linked open data cloud. First, we
                 use structured inverted indices to quickly find
                 potential candidate results from entities that have
                 been indexed in our system. Our system then analyzes
                 the candidate matches and refines them whenever deemed
                 necessary using computationally more expensive queries
                 on a graph database. Finally, we resort to human
                 computation by dynamically generating crowdsourcing
                 tasks in case the algorithmic components fail to come
                 up with convincing results. We integrate all results
                 from the inverted indices, from the graph database and
                 from the crowd using a probabilistic framework in order
                 to make sensible decisions about candidate matches and
                 to identify unreliable human workers. In the following,
                 we give an overview of the architecture of our system
                 and describe in detail our novel three-stage blocking
                 technique and our probabilistic decision framework. We
                 also report on a series of experimental results on a
                 standard data set, showing that our system can achieve
                 a 95 \% average accuracy on instance matching (as
                 compared to the initial 88 \% average accuracy of the
                 purely automatic baseline) while drastically limiting
                 the amount of work performed by the crowd. The
                 experimental evaluation of our system on the entity
                 linking task shows an average relative improvement of
                 14 \% over our best automatic approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sagi:2013:SMP,
  author =       "Tomer Sagi and Avigdor Gal",
  title =        "Schema matching prediction with applications to data
                 source discovery and dynamic ensembling",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "689--710",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0325-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Web-scale data integration involves fully automated
                 efforts which lack knowledge of the exact match between
                 data descriptions. In this paper, we introduce schema
                 matching prediction, an assessment mechanism to support
                 schema matchers in the absence of an exact match. Given
                 attribute pair-wise similarity measures, a predictor
                 predicts the success of a matcher in identifying
                 correct correspondences. We present a comprehensive
                 framework in which predictors can be defined, designed,
                 and evaluated. We formally define schema matching
                 evaluation and schema matching prediction using
                 similarity spaces and discuss a set of four desirable
                 properties of predictors, namely correlation,
                 robustness, tunability, and generalization. We present
                 a method for constructing predictors, supporting
                 generalization, and introduce prediction models as
                 means of tuning prediction toward various quality
                 measures. We define the empirical properties of
                 correlation and robustness and provide concrete
                 measures for their evaluation. We illustrate the
                 usefulness of schema matching prediction by presenting
                 three use cases: We propose a method for ranking the
                 relevance of deep Web sources with respect to given
                 user needs. We show how predictors can assist in the
                 design of schema matching systems. Finally, we show how
                 prediction can support dynamic weight setting of
                 matchers in an ensemble, thus improving upon current
                 state-of-the-art weight setting methods. An extensive
                 empirical evaluation shows the usefulness of predictors
                 in these use cases and demonstrates the usefulness of
                 prediction models in increasing the performance of
                 schema matching.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:2013:HEC,
  author =       "Jongwuk Lee and Hyunsouk Cho and Jin-Woo Park and
                 Young-Rok Cha and Seung-Won Hwang and Zaiqing Nie and
                 Ji-Rong Wen",
  title =        "Hybrid entity clustering using crowds and data",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "5",
  pages =        "711--726",
  month =        oct,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0328-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Dec 16 16:57:30 MST 2013",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Query result clustering has attracted considerable
                 attention as a means of providing users with a concise
                 overview of results. However, little research effort
                 has been devoted to organizing the query results for
                 entities which refer to real-world concepts, e.g.,
                 people, products, and locations. Entity-level result
                 clustering is more challenging because diverse
                 similarity notions between entities need to be
                 supported in heterogeneous domains, e.g., image
                 resolution is an important feature for cameras, but not
                 for fruits. To address this challenge, we propose a
                 hybrid relationship clustering algorithm, called Hydra,
                 using co-occurrence and numeric features. Algorithm
                 Hydra captures diverse user perceptions from
                 co-occurrence and disambiguates different senses using
                 feature-based similarity. In addition, we extend Hydra
                 into $ \mathsf {Hydra}_\mathsf {gData} $ with different
                 sources, i.e., entity types and crowdsourcing.
                 Experimental results show that the proposed algorithms
                 achieve effectiveness and efficiency in real-life and
                 synthetic datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2013:EPG,
  author =       "Xiang Zhao and Chuan Xiao and Xuemin Lin and Wei Wang
                 and Yoshiharu Ishikawa",
  title =        "Efficient processing of graph similarity queries with
                 edit distance constraints",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "727--752",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0306-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graphs are widely used to model complicated data
                 semantics in many applications in bioinformatics,
                 chemistry, social networks, pattern recognition, etc. A
                 recent trend is to tolerate noise arising from various
                 sources such as erroneous data entries and find
                 similarity matches. In this paper, we study graph
                 similarity queries with edit distance constraints.
                 Inspired by the q -gram idea for string similarity
                 problems, our solution extracts paths from graphs as
                 features for indexing. We establish a lower bound of
                 common features to generate candidates. Efficient
                 algorithms are proposed to handle three types of graph
                 similarity queries by exploiting both matching and
                 mismatching features as well as degree information to
                 improve the filtering and verification on candidates.
                 We demonstrate the proposed algorithms significantly
                 outperform existing approaches with extensive
                 experiments on real and synthetic datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gemulla:2013:NUI,
  author =       "Rainer Gemulla and Peter J. Haas and Wolfgang Lehner",
  title =        "Non-uniformity issues and workarounds in bounded-size
                 sampling",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "753--772",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0307-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A variety of schemes have been proposed in the
                 literature to speed up query processing and analytics
                 by incrementally maintaining a bounded-size uniform
                 sample from a dataset in the presence of a sequence of
                 insertion, deletion, and update transactions. These
                 algorithms vary according to whether the dataset is an
                 ordinary set or a multiset and whether the transaction
                 sequence consists only of insertions or can include
                 deletions and updates. We report on subtle
                 non-uniformity issues that we found in a number of
                 these prior bounded-size sampling schemes, including
                 some of our own. We provide workarounds that can avoid
                 the non-uniformity problem; these workarounds are easy
                 to implement and incur negligible additional cost. We
                 also consider the impact of non-uniformity in practice
                 and describe simple statistical tests that can help
                 detect non-uniformity in new algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Whang:2013:JER,
  author =       "Steven Euijong Whang and Hector Garcia-Molina",
  title =        "Joint entity resolution on multiple datasets",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "773--795",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0308-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Entity resolution (ER) is the problem of identifying
                 which records in a database represent the same entity.
                 Often, records of different types are involved (e.g.,
                 authors, publications, institutions, venues), and
                 resolving records of one type can impact the resolution
                 of other types of records. In this paper we propose a
                 flexible, modular resolution framework where existing
                 ER algorithms developed for a given record type can be
                 plugged in and used in concert with other ER
                 algorithms. Our approach also makes it possible to run
                 ER on subsets of similar records at a time, important
                 when the full data are too large to resolve together.
                 We study the scheduling and coordination of the
                 individual ER algorithms, in order to resolve the full
                 dataset, and show the scalability of our approach. We
                 also introduce a ``state-based'' training technique
                 where each ER algorithm is trained for the particular
                 execution context (relative to other types of records)
                 where it will be used.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xu:2013:DPH,
  author =       "Jia Xu and Zhenjie Zhang and Xiaokui Xiao and Yin Yang
                 and Ge Yu and Marianne Winslett",
  title =        "Differentially private histogram publication",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "797--822",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0309-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Differential privacy (DP) is a promising scheme for
                 releasing the results of statistical queries on
                 sensitive data, with strong privacy guarantees against
                 adversaries with arbitrary background knowledge.
                 Existing studies on differential privacy mostly focus
                 on simple aggregations such as counts. This paper
                 investigates the publication of DP-compliant
                 histograms, which is an important analytical tool for
                 showing the distribution of a random variable, e.g.,
                 hospital bill size for certain patients. Compared to
                 simple aggregations whose results are purely numerical,
                 a histogram query is inherently more complex, since it
                 must also determine its structure, i.e., the ranges of
                 the bins. As we demonstrate in the paper, a
                 DP-compliant histogram with finer bins may actually
                 lead to significantly lower accuracy than a coarser
                 one, since the former requires stronger perturbations
                 in order to satisfy DP. Moreover, the histogram
                 structure itself may reveal sensitive information,
                 which further complicates the problem. Motivated by
                 this, we propose two novel mechanisms, namely
                 NoiseFirst and StructureFirst, for computing
                 DP-compliant histograms. Their main difference lies in
                 the relative order of the noise injection and the
                 histogram structure computation steps. NoiseFirst has
                 the additional benefit that it can improve the accuracy
                 of an already published DP-compliant histogram computed
                 using a naive method. For each of proposed mechanisms,
                 we design algorithms for computing the optimal
                 histogram structure with two different objectives:
                 minimizing the mean square error and the mean absolute
                 error, respectively. Going one step further, we extend
                 both mechanisms to answer arbitrary range queries.
                 Extensive experiments, using several real datasets,
                 confirm that our two proposals output highly accurate
                 query answers and consistently outperform existing
                 competitors.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fink:2013:AAP,
  author =       "Robert Fink and Jiewen Huang and Dan Olteanu",
  title =        "Anytime approximation in probabilistic databases",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "823--848",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0310-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This article describes an approximation algorithm for
                 computing the probability of propositional formulas
                 over discrete random variables. It incrementally
                 refines lower and upper bounds on the probability of
                 the formulas until the desired absolute or relative
                 error guarantee is reached. This algorithm is used by
                 the SPROUT query engine to approximate the
                 probabilities of results to relational algebra queries
                 on expressive probabilistic databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Drosou:2013:YER,
  author =       "Marina Drosou and Evaggelia Pitoura",
  title =        "{YmalDB}: exploring relational databases via
                 result-driven recommendations",
  journal =      j-VLDB-J,
  volume =       "22",
  number =       "6",
  pages =        "849--874",
  month =        dec,
  year =         "2013",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0311-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:45 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The typical user interaction with a database system is
                 through queries. However, many times users do not have
                 a clear understanding of their information needs or the
                 exact content of the database. In this paper, we
                 propose assisting users in database exploration by
                 recommending to them additional items, called Ymal
                 (``You May Also Like'') results, that, although not
                 part of the result of their original query, appear to
                 be highly related to it. Such items are computed based
                 on the most interesting sets of attribute values,
                 called faSets, that appear in the result of the
                 original query. The interestingness of a faSet is
                 defined based on its frequency in the query result and
                 in the database. Database frequency estimations rely on
                 a novel approach of maintaining a set of representative
                 rare faSets. We have implemented our approach and
                 report results regarding both its performance and its
                 usefulness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Johnson:2014:EUC,
  author =       "Ryan Johnson and Ippokratis Pandis and Anastasia
                 Ailamaki",
  title =        "Eliminating unscalable communication in transaction
                 processing",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "1--23",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0312-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Multicore hardware demands software parallelism.
                 Transaction processing workloads typically exhibit high
                 concurrency, and, thus, provide ample opportunities for
                 parallel execution. Unfortunately, because of the
                 characteristics of the application, transaction
                 processing systems must moderate and coordinate
                 communication between independent agents; since it is
                 notoriously difficult to implement high performing
                 transaction processing systems that incur no
                 communication whatsoever. As a result, transaction
                 processing systems cannot always convert abundant, even
                 embarrassing, request-level parallelism into execution
                 parallelism due to communication bottlenecks.
                 Transaction processing system designers must therefore
                 find ways to achieve scalability while still allowing
                 communication to occur. To this end, we identify three
                 forms of communication in the system-- unbounded,
                 fixed, and cooperative --and argue that only the first
                 type poses a fundamental threat to scalability. The
                 other two types tend not impose obstacles to
                 scalability, though they may reduce single-thread
                 performance. We argue that proper analysis of
                 communication patterns in any software system is a
                 powerful tool for improving the system's scalability.
                 Then, we present and evaluate under a common framework
                 techniques that attack significant sources of unbounded
                 communication during transaction processing and sketch
                 a solution for those that remain. The solutions we
                 present affect fundamental services of any transaction
                 processing engine, such as locking, logging, physical
                 page accesses, and buffer pool frame accesses. They
                 either reduce such communication through caching,
                 downgrade it to a less-threatening type, or eliminate
                 it completely through system design. We find that the
                 later technique, revisiting the transaction processing
                 architecture, is the most effective. The final design
                 cuts unbounded communication by roughly an order of
                 magnitude compared with the baseline, while exhibiting
                 better scalability on multicore machines.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2014:EQP,
  author =       "Junfeng Zhou and Zhifeng Bao and Wei Wang and Jinjia
                 Zhao and Xiaofeng Meng",
  title =        "Efficient query processing for {XML} keyword queries
                 based on the {IDList} index",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "25--50",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0313-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Keyword search over XML data has attracted a lot of
                 research efforts in the last decade, where one of the
                 fundamental research problems is how to efficiently
                 answer a given keyword query w.r.t. a certain query
                 semantics. We found that the key factor resulting in
                 the inefficiency for existing methods is that they all
                 heavily suffer from the common-ancestor-repetition
                 problem. In this paper, we propose a novel form of
                 inverted list, namely the IDList; the IDList for
                 keyword k consists of ordered nodes that directly or
                 indirectly contain k . We then show that finding
                 keyword query results based on the smallest lowest
                 common ancestor and exclusive lowest common ancestor
                 semantics can be reduced to ordered set intersection
                 problem, which has been heavily optimized due to its
                 application in areas such as information retrieval and
                 database systems. We propose several algorithms that
                 exploit set intersection in different directions and
                 with or without using additional indexes. We further
                 propose several algorithms that are based on hash
                 search to simplify the operation of finding common
                 nodes from all involved IDLists. We have conducted an
                 extensive set of experiments using many
                 state-of-the-art algorithms and several large-scale
                 datasets. The results demonstrate that our proposed
                 methods outperform existing methods by up to two orders
                 of magnitude in many cases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Slavov:2014:GBA,
  author =       "Vasil Slavov and Praveen Rao",
  title =        "A gossip-based approach for {Internet}-scale
                 cardinality estimation of {XPath} queries over
                 distributed semistructured data",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "51--76",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0314-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we address the problem of cardinality
                 estimation of XPath queries over XML data stored in a
                 distributed, Internet-scale environment such as a
                 large-scale, data sharing system designed to foster
                 innovations in biomedical and health informatics. The
                 cardinality estimate of XPath expressions is useful in
                 XQuery optimization, designing IR-style relevance
                 ranking schemes, and statistical hypothesis testing. We
                 present a novel gossip algorithm called XGossip, which
                 given an XPath query estimates the number of XML
                 documents in the network that contain a match for the
                 query. XGossip is designed to be scalable,
                 decentralized, and robust to failures--properties that
                 are desirable in a large-scale distributed system.
                 XGossip employs a novel divide-and-conquer strategy for
                 load balancing and reducing the bandwidth consumption.
                 We conduct theoretical analysis of XGossip in terms of
                 accuracy of cardinality estimation, message complexity,
                 and bandwidth consumption. We present a comprehensive
                 performance evaluation of XGossip on Amazon EC2 using a
                 heterogeneous collection of XML documents.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Whang:2014:IER,
  author =       "Steven Euijong Whang and Hector Garcia-Molina",
  title =        "Incremental entity resolution on rules and data",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "77--102",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0315-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Entity resolution (ER) identifies database records
                 that refer to the same real-world entity. In practice,
                 ER is not a one-time process, but is constantly
                 improved as the data, schema and application are better
                 understood. We first address the problem of keeping the
                 ER result up-to-date when the ER logic or data
                 ``evolve'' frequently. A na{\"\i}ve approach that
                 re-runs ER from scratch may not be tolerable for
                 resolving large datasets. This paper investigates when
                 and how we can instead exploit previous
                 ``materialized'' ER results to save redundant work with
                 evolved logic and data. We introduce algorithm
                 properties that facilitate evolution, and we propose
                 efficient rule and data evolution techniques for three
                 ER models: match-based clustering (records are
                 clustered based on Boolean matching information),
                 distance-based clustering (records are clustered based
                 on relative distances), and pairs ER (the pairs of
                 matching records are identified). Using real datasets,
                 we illustrate the cost of materializations and the
                 potential gains of evolution over the na{\"\i}ve
                 approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Beskales:2014:SRC,
  author =       "George Beskales and Ihab F. Ilyas and Lukasz Golab and
                 Artur Galiullin",
  title =        "Sampling from repairs of conditional functional
                 dependency violations",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "103--128",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0316-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Violations of functional dependencies (FDs) and
                 conditional functional dependencies (CFDs) are common
                 in practice, often indicating deviations from the
                 intended data semantics. These violations arise in many
                 contexts such as data integration and Web data
                 extraction. Resolving these violations is challenging
                 for a variety of reasons, one of them being the
                 exponential number of possible repairs. Most of the
                 previous work has tackled this problem by producing a
                 single repair that is nearly optimal with respect to
                 some metric. In this paper, we propose a novel data
                 cleaning approach that is not limited to finding a
                 single repair, namely sampling from the space of
                 possible repairs. We give several motivating scenarios
                 where sampling from the space of CFD repairs is
                 desirable, we propose a new class of useful repairs,
                 and we present an algorithm that randomly samples from
                 this space in an efficient way. We also show how to
                 restrict the space of repairs based on constraints that
                 reflect the accuracy of different parts of the
                 database. We experimentally evaluate our algorithms
                 against previous approaches to show the utility and
                 efficiency of our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:2014:TEM,
  author =       "Jongwuk Lee and Seung-Won Hwang",
  title =        "Toward efficient multidimensional subspace skyline
                 computation",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "129--145",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0317-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Skyline queries have attracted considerable attention
                 to assist multicriteria analysis of large-scale
                 datasets. In this paper, we focus on multidimensional
                 subspace skyline computation that has been actively
                 studied for two approaches. First, to narrow down a
                 full-space skyline, users may consider multiple
                 subspace skylines reflecting their interest. For this
                 purpose, we tackle the concept of a skycube, which
                 consists of all possible non-empty subspace skylines in
                 a given full space. Second, to understand diverse
                 semantics of subspace skylines, we address skyline
                 groups in which a skyline point (or a set of skyline
                 points) is annotated with decisive subspaces. Our
                 primary contributions are to identify common building
                 blocks of the two approaches and to develop orthogonal
                 optimization principles that benefit both approaches.
                 Our experimental results show the efficiency of
                 proposed algorithms by comparing them with
                 state-of-the-art algorithms in both synthetic and
                 real-life datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zellag:2014:CAM,
  author =       "Kamal Zellag and Bettina Kemme",
  title =        "Consistency anomalies in multi-tier architectures:
                 automatic detection and prevention",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "1",
  pages =        "147--172",
  month =        feb,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0318-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Feb 13 09:58:46 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern transaction systems, consisting of an
                 application server tier and a database tier, offer
                 several levels of isolation providing a trade-off
                 between performance and consistency. While it is fairly
                 well known how to identify qualitatively the anomalies
                 that are possible under a certain isolation level, it
                 is much more difficult to detect and quantify such
                 anomalies during run-time of a given application. In
                 this paper, we present a new approach to detect and
                 quantify consistency anomalies for arbitrary multi-tier
                 application running under any isolation levels ensuring
                 at least read committed. In fact, the application can
                 run even under a mixture of isolation levels. Our
                 detection approach can be online or off-line and for
                 each detected anomaly, we identify exactly the
                 transactions and data items involved. Furthermore, we
                 classify the detected anomalies into patterns showing
                 the business methods involved as well as analyzing the
                 types of cycles that occur. Our approach can help
                 designers to either choose an isolation level where the
                 anomalies do not occur or to change the transaction
                 design to avoid the anomalies. Furthermore, we provide
                 an option in which the occurrence of anomalies can be
                 automatically reduced during run-time. To test the
                 effectiveness and efficiency of our approach, we have
                 conducted a set of experiments using a wide range of
                 benchmarks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ozsoyoglu:2014:SIB,
  author =       "Z. Meral {\"O}zsoyo{\u{g}}lu and U{\u{g}}ur
                 {\c{C}}etintemel and Nilesh Dalvi and Hank Korth and
                 Anthony Tung",
  title =        "Special issue on best papers of {VLDB 2012}",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "173--174",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0356-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-014-0356-z;
                 http://link.springer.com/content/pdf/10.1007/s00778-014-0356-z.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Angel:2014:DSM,
  author =       "Albert Angel and Nick Koudas and Nikos Sarkas and
                 Divesh Srivastava and Michael Svendsen and Srikanta
                 Tirthapura",
  title =        "Dense subgraph maintenance under streaming edge weight
                 updates for real-time story identification",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "175--199",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0340-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0340-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Das:2014:EFE,
  author =       "Mahashweta Das and Saravanan Thirumuruganathan and
                 Sihem Amer-Yahia and Gautam Das and Cong Yu",
  title =        "An expressive framework and efficient algorithms for
                 the analysis of collaborative tagging",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "201--226",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0341-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0341-y",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Cheng:2014:EPH,
  author =       "James Cheng and Zechao Shang and Hong Cheng and Haixun
                 Wang and Jeffrey Xu Yu",
  title =        "Efficient processing of $k$-hop reachability queries",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "227--252",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0346-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0346-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Koch:2014:DHO,
  author =       "Christoph Koch and Yanif Ahmad and Oliver Kennedy and
                 Milos Nikolic and Andres N{\"o}tzli and Daniel Lupei
                 and Amir Shaikhha",
  title =        "{DBToaster}: higher-order delta processing for
                 dynamic, frequently fresh views",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "253--278",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0348-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0348-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Bailis:2014:QEC,
  author =       "Peter Bailis and Shivaram Venkataraman and Michael J.
                 Franklin and Joseph M. Hellerstein and Ion Stoica",
  title =        "Quantifying eventual consistency with {PBS}",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "279--302",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0330-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0330-1",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Graefe:2014:TSA,
  author =       "Goetz Graefe and Felix Halim and Stratos Idreos and
                 Harumi Kuno and Stefan Manegold and Bernhard Seeger",
  title =        "Transactional support for adaptive indexing",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "303--328",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0345-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0345-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Zhang:2014:TCE,
  author =       "Ning Zhang and Junichi Tatemura and Jignesh M. Patel
                 and Hakan Hacigumus",
  title =        "Toward cost-effective storage provisioning for
                 {DBMSs}",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "2",
  pages =        "329--354",
  month =        apr,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0334-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 26 17:19:12 MDT 2016",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-013-0334-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://link.springer.com/journal/778",
}

@Article{Doulkeridis:2014:SLS,
  author =       "Christos Doulkeridis and Kjetil N{\o}rv{\aa}g",
  title =        "A survey of large-scale analytical query processing in
                 {MapReduce}",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "355--380",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0319-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Enterprises today acquire vast volumes of data from
                 different sources and leverage this information by
                 means of data analysis to support effective
                 decision-making and provide new functionality and
                 services. The key requirement of data analytics is
                 scalability, simply due to the immense volume of data
                 that need to be extracted, processed, and analyzed in a
                 timely fashion. Arguably the most popular framework for
                 contemporary large-scale data analytics is MapReduce,
                 mainly due to its salient features that include
                 scalability, fault-tolerance, ease of programming, and
                 flexibility. However, despite its merits, MapReduce has
                 evident performance limitations in miscellaneous
                 analytical tasks, and this has given rise to a
                 significant body of research that aim at improving its
                 efficiency, while maintaining its desirable properties.
                 This survey aims to review the state of the art in
                 improving the performance of parallel query processing
                 using MapReduce. A set of the most significant
                 weaknesses and limitations of MapReduce is discussed at
                 a high level, along with solving techniques. A taxonomy
                 is presented for categorizing existing research on
                 MapReduce improvements according to the specific
                 problem they target. Based on the proposed taxonomy, a
                 classification of existing research is provided
                 focusing on the optimization objective. Concluding, we
                 outline interesting directions for future parallel data
                 processing systems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2014:EDT,
  author =       "Xiangmin Zhou and Lei Chen",
  title =        "Event detection over {Twitter} social media streams",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "381--400",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0320-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In recent years, microblogs have become an important
                 source for reporting real-world events. A real-world
                 occurrence reported in microblogs is also called a
                 social event. Social events may hold critical materials
                 that describe the situations during a crisis. In real
                 applications, such as crisis management and decision
                 making, monitoring the critical events over social
                 streams will enable watch officers to analyze a whole
                 situation that is a composite event, and make the right
                 decision based on the detailed contexts such as what is
                 happening, where an event is happening, and who are
                 involved. Although there has been significant research
                 effort on detecting a target event in social networks
                 based on a single source, in crisis, we often want to
                 analyze the composite events contributed by different
                 social users. So far, the problem of integrating
                 ambiguous views from different users is not well
                 investigated. To address this issue, we propose a novel
                 framework to detect composite social events over
                 streams, which fully exploits the information of social
                 data over multiple dimensions. Specifically, we first
                 propose a graphical model called location-time
                 constrained topic (LTT) to capture the content, time,
                 and location of social messages. Using LTT, a social
                 message is represented as a probability distribution
                 over a set of topics by inference, and the similarity
                 between two messages is measured by the distance
                 between their distributions. Then, the events are
                 identified by conducting efficient similarity joins
                 over social media streams. To accelerate the similarity
                 join, we also propose a variable dimensional extendible
                 hash over social streams. We have conducted extensive
                 experiments to prove the high effectiveness and
                 efficiency of the proposed approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hung:2014:QTB,
  author =       "Ho Hoang Hung and Sourav S. Bhowmick and Ba Quan
                 Truong and Byron Choi and Shuigeng Zhou",
  title =        "{QUBLE}: towards blending interactive visual subgraph
                 search queries on large networks",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "401--426",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0322-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In a previous paper, we laid out the vision of a novel
                 graph query processing paradigm where instead of
                 processing a visual query graph after its construction,
                 it interleaves visual query formulation and processing
                 by exploiting the latency offered by the gui to filter
                 irrelevant matches and prefetch partial query results [
                 8]. Our recent attempts at implementing this vision [8,
                 9] show significant improvement in system response time
                 (srt) for subgraph queries. However, these efforts are
                 designed specifically for graph databases containing a
                 large collection of small or medium-sized graphs. In
                 this paper, we propose a novel algorithm called QUBLE
                 (QUery Blender for Large nEtworks) to realize this
                 visual subgraph querying paradigm on very large
                 networks (e.g., protein interaction networks, social
                 networks). First, it decomposes a large network into a
                 set of graphlets and supergraphlets using a minimum
                 cut-based graph partitioning technique. Next, it mines
                 approximate frequent and small infrequent fragments
                 (sifs) from them and identifies their occurrences in
                 these graphlets and supergraphlets. Then, the indexing
                 framework of [9] is enhanced so that the mined
                 fragments can be exploited to index graphlets for
                 efficient blending of visual subgraph query formulation
                 and query processing. Extensive experiments on large
                 networks demonstrate effectiveness of QUBLE.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Teodoro:2014:ASS,
  author =       "George Teodoro and Eduardo Valle and Nathan Mariano
                 and Ricardo Torres and Wagner {Meira, Jr.} and Joel
                 H. Saltz",
  title =        "Approximate similarity search for online multimedia
                 services on distributed {CPU--GPU} platforms",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "427--448",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0329-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Similarity search in high-dimensional spaces is a
                 pivotal operation for several database applications,
                 including online content-based multimedia services.
                 With the increasing popularity of multimedia
                 applications, these services are facing new challenges
                 regarding (1) the very large and growing volumes of
                 data to be indexed/searched and (2) the necessity of
                 reducing the response times as observed by end-users.
                 In addition, the nature of the interactions between
                 users and online services creates fluctuating query
                 request rates throughout execution, which requires a
                 similarity search engine to adapt to better use the
                 computation platform and minimize response times. In
                 this work, we address these challenges with
                 Hypercurves, a flexible framework for answering
                 approximate k-nearest neighbor (kNN) queries for very
                 large multimedia databases. Hypercurves executes in
                 hybrid CPU---GPU environments and is able to attain
                 massive query-processing rates through the cooperative
                 use of these devices. Hypercurves also changes its
                 CPU---GPU task partitioning dynamically according to
                 the observed load, aiming for optimal response times.
                 In our empirical evaluation, dynamic task partitioning
                 reduced query response times by approximately 50\%
                 compared to the best static task partition. Due to a
                 probabilistic proof of equivalence to the sequential
                 kNN algorithm, the CPU---GPU execution of Hypercurves
                 in distributed (multi-node) environments can be
                 aggressively optimized, attaining superlinear
                 scalability while still guaranteeing, with high
                 probability, results at least as good as those from the
                 sequential algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Shang:2014:PTM,
  author =       "Shuo Shang and Ruogu Ding and Kai Zheng and Christian
                 S. Jensen and Panos Kalnis and Xiaofang Zhou",
  title =        "Personalized trajectory matching in spatial networks",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "449--468",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0331-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the increasing availability of moving-object
                 tracking data, trajectory search and matching is
                 increasingly important. We propose and investigate a
                 novel problem called personalized trajectory matching
                 (PTM). In contrast to conventional trajectory
                 similarity search by spatial distance only, PTM takes
                 into account the significance of each sample point in a
                 query trajectory. A PTM query takes a trajectory with
                 user-specified weights for each sample point in the
                 trajectory as its argument. It returns the trajectory
                 in an argument data set with the highest similarity to
                 the query trajectory. We believe that this type of
                 query may bring significant benefits to users in many
                 popular applications such as route planning,
                 carpooling, friend recommendation, traffic analysis,
                 urban computing, and location-based services in
                 general. PTM query processing faces two challenges: how
                 to prune the search space during the query processing
                 and how to schedule multiple so-called expansion
                 centers effectively. To address these challenges, a
                 novel two-phase search algorithm is proposed that
                 carefully selects a set of expansion centers from the
                 query trajectory and exploits upper and lower bounds to
                 prune the search space in the spatial and temporal
                 domains. An efficiency study reveals that the algorithm
                 explores the minimum search space in both domains.
                 Second, a heuristic search strategy based on priority
                 ranking is developed to schedule the multiple expansion
                 centers, which can further prune the search space and
                 enhance the query efficiency. The performance of the
                 PTM query is studied in extensive experiments based on
                 real and synthetic trajectory data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Richter:2014:TZO,
  author =       "Stefan Richter and Jorge-Arnulfo Quian{\'e}-Ruiz and
                 Stefan Schuh and Jens Dittrich",
  title =        "Towards zero-overhead static and adaptive indexing in
                 {Hadoop}",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "469--494",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0332-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Hadoop MapReduce has evolved to an important industry
                 standard for massive parallel data processing and has
                 become widely adopted for a variety of use-cases.
                 Recent works have shown that indexes can improve the
                 performance of selective MapReduce jobs dramatically.
                 However, one major weakness of existing approaches is
                 high index creation costs. We present HAIL (Hadoop
                 Aggressive Indexing Library), a novel indexing approach
                 for HDFS and Hadoop MapReduce. HAIL creates different
                 clustered indexes over terabytes of data with minimal,
                 often invisible costs, and it dramatically improves
                 runtimes of several classes of MapReduce jobs. HAIL
                 features two different indexing pipelines, static
                 indexing and adaptive indexing. HAIL static indexing
                 efficiently indexes datasets while uploading them to
                 HDFS. Thereby, HAIL leverages the default replication
                 of Hadoop and enhances it with logical replication.
                 This allows HAIL to create multiple clustered indexes
                 for a dataset, e.g., one for each physical replica.
                 Still, in terms of upload time, HAIL matches or even
                 improves over the performance of standard HDFS.
                 Additionally, HAIL adaptive indexing allows for
                 automatic, incremental indexing at job runtime with
                 minimal runtime overhead. For example, HAIL adaptive
                 indexing can completely index a dataset as byproduct of
                 only four MapReduce jobs while incurring an overhead as
                 low as 11\% for the very first of those job only. In
                 our experiments, we show that HAIL improves job
                 runtimes by up to $ 68 \times $ over Hadoop. This
                 article is an extended version of the VLDB 2012 paper
                 (Dittrich et al. in PVLDB 5(11):1591---1602, 2012).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Meier:2014:BR,
  author =       "Michael Meier",
  title =        "The backchase revisited",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "3",
  pages =        "495--516",
  month =        jun,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0333-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 29 06:13:52 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Semantic query optimization is the process of finding
                 equivalent rewritings of an input query given
                 constraints that hold in a database instance. In this
                 paper, we report about a Chase \& Backchase (C\&B)
                 algorithm strategy that generalizes and improves on
                 well-known methods in the field. The implementation of
                 our approach, the Pegasussystem, outperforms existing
                 C\&B systems an average by two orders of magnitude.
                 This gain in performance is due to a combination of
                 novel methods that lower the complexity in practical
                 situations significantly.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gedik:2014:PFS,
  author =       "Bugra Gedik",
  title =        "Partitioning functions for stateful data parallelism
                 in stream processing",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "517--539",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0335-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study partitioning functions for
                 stream processing systems that employ stateful data
                 parallelism to improve application throughput. In
                 particular, we develop partitioning functions that are
                 effective under workloads where the domain of the
                 partitioning key is large and its value distribution is
                 skewed. We define various desirable properties for
                 partitioning functions, ranging from balance properties
                 such as memory, processing, and communication balance,
                 structural properties such as compactness and fast
                 lookup, and adaptation properties such as fast
                 computation and minimal migration. We introduce a
                 partitioning function structure that is compact and
                 develop several associated heuristic construction
                 techniques that exhibit good balance and low migration
                 cost under skewed workloads. We provide experimental
                 results that compare our partitioning functions to more
                 traditional approaches such as uniform and consistent
                 hashing, under different workload and application
                 characteristics, and show superior performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Koh:2014:FKM,
  author =       "Jia-Ling Koh and Chen-Yi Lin and Arbee L. Chen",
  title =        "Finding $ k k $ most favorite products based on
                 reverse top-$ t t $ queries",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "541--564",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0336-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A reverse top-$t$ query for a product returns a set of
                 customers, named potential customers, who regard the
                 product as one of their top-$t$ favorites. Given a set
                 of customers with different preferences on the features
                 of the products, we want to select at most k k products
                 from a pool of candidate products such that their total
                 number of potential customers is maximized. Two
                 versions of the problem are defined according to
                 whether the competitive existing products are given.
                 For solving this NP-hard problem, we first propose an
                 incremental greedy approach to find an approximate
                 solution of the problem with quality guaranteed. For
                 further speeding up this basic greedy approach, we
                 exploit several properties of the top-$ t t$ queries
                 and skyline queries to reduce the solution space of the
                 problem. In addition, an upper bound of the potential
                 customers is estimated to reduce the cost of computing
                 the reverse top-$ t t$ queries for the candidate
                 products. Finally, when the candidate products are
                 formed from multiple component tables, we propose a
                 strategy to reduce the number of the accessed tuples in
                 the component tables such that only the tuples that are
                 possibly components of the top-$ t t$ favorites of the
                 customers need to be accessed. By applying these
                 pruning strategies, we propose another faster greedy
                 approach. The experiment results demonstrate that the
                 proposed pruning strategies work very well and make the
                 faster greedy algorithms for both versions of the
                 problem achieve excellent performance on both
                 efficiency and memory utilization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zou:2014:GGB,
  author =       "Lei Zou and M. Tamer {\"O}zsu and Lei Chen and Xuchuan
                 Shen and Ruizhe Huang and Dongyan Zhao",
  title =        "{gStore}: a graph-based {SPARQL} query engine",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "565--590",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0337-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We address efficient processing of SPARQL queries over
                 RDF datasets. The proposed techniques, incorporated
                 into the gStore system, handle, in a uniform and
                 scalable manner, SPARQL queries with wildcards and
                 aggregate operators over dynamic RDF datasets. Our
                 approach is graph based. We store RDF data as a large
                 graph and also represent a SPARQL query as a query
                 graph. Thus, the query answering problem is converted
                 into a subgraph matching problem. To achieve efficient
                 and scalable query processing, we develop an index,
                 together with effective pruning rules and efficient
                 search algorithms. We propose techniques that use this
                 infrastructure to answer aggregation queries. We also
                 propose an effective maintenance algorithm to handle
                 online updates over RDF repositories. Extensive
                 experiments confirm the efficiency and effectiveness of
                 our solutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tao:2014:ILW,
  author =       "Yufei Tao and Yi Yang and Xiaocheng Hu and Cheng Sheng
                 and Shuigeng Zhou",
  title =        "Instance-level worst-case query bounds on {R}-trees",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "591--607",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0339-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Even with its significant impacts on the database
                 area, the R-tree is often criticized by its lack of
                 good worst-case guarantees. For example, in range
                 search (where we want to report all the data points in
                 a query rectangle), it is known that on adversely
                 designed datasets and queries, an R-tree can be as slow
                 as a sequential scan that simply reads all the data
                 points. Nevertheless, R-trees work so well on real data
                 that they have been widely implemented in commercial
                 systems. This stark contrast has caused long-term
                 controversy between practitioners and theoreticians as
                 to whether this structure deserves its fame. This paper
                 provides theoretical evidence that, somewhat
                 surprisingly, R-trees are efficient in the worst case
                 for range search on many real datasets. Given any
                 integer K K, we explain how to obtain an upper bound on
                 the cost of answering all (i.e., infinitely many) range
                 queries retrieving at most K K objects. On practical
                 data, the upper bound is only a fraction of the
                 overhead of sequential scan (unless, apparently, K K is
                 at the same order as the dataset size). Our upper
                 bounds are tight up to a constant factor, namely they
                 cannot be lowered by more than $ O(1) O(1) $ times
                 while still capturing the most expensive queries. Our
                 upper bounds can be calculated in constant time by
                 remembering only three integers. These integers, in
                 turn, are generated from only the leaf MBRs of an
                 R-tree, but not the leaf nodes themselves. In practice,
                 the internal nodes are often buffered in memory, so
                 that the integers aforementioned can be efficiently
                 maintained along with the data updates and made
                 available to a query optimizer at any time.
                 Furthermore, our analytical framework introduces
                 instance-level query bound as a new technique for
                 evaluating the efficiency of heuristic structures in a
                 theory-flavored manner (previously, experimentation was
                 the dominant assessment method).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cicek:2014:ELD,
  author =       "A. Ercument Cicek and Mehmet Ercan Nergiz and Yucel
                 Saygin",
  title =        "Ensuring location diversity in privacy-preserving
                 spatio-temporal data publishing",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "609--625",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0342-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The rise of mobile technologies in the last decade has
                 led to vast amounts of location information generated
                 by individuals. From the knowledge discovery point of
                 view, these data are quite valuable, but the inherent
                 personal information in the data raises privacy
                 concerns. There exists many algorithms in the
                 literature to satisfy the privacy requirements of
                 individuals, by generalizing, perturbing, and
                 suppressing their data. Current techniques that try to
                 ensure a level of indistinguishability between
                 trajectories in a dataset are direct applications of k
                 k -anonymity, thus suffer from the shortcomings of k k
                 -anonymity such as the lack of diversity in sensitive
                 regions. Moreover, these techniques fail to incorporate
                 some common background knowledge, an adversary might
                 have such as the underlying map, the traffic density,
                 and the anonymization algorithm itself. We propose a
                 new privacy metric p p -confidentiality that ensures
                 location diversity by bounding the probability of a
                 user visiting a sensitive location with the p p input
                 parameter. We perform our probabilistic analysis based
                 on the background knowledge of the adversary. Instead
                 of grouping the trajectories, we anonymize the
                 underlying map, that is, we group nodes (points of
                 interest) to create obfuscation areas around sensitive
                 locations. The groups are formed in such a way that the
                 parts of trajectories entering the groups, coupled with
                 the adversary background, do not increase the
                 adversary's belief in violating the p p
                 -confidentiality. We then use the map anonymization as
                 a model to anonymize the trajectories. We prove that
                 our algorithm is resistant to reverse-engineering
                 attacks when the statistics required for map
                 anonymization is publicly available. We empirically
                 evaluate the performance of our algorithm and show that
                 location diversity can be satisfied effectively.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Unterbrunner:2014:HAE,
  author =       "Philipp Unterbrunner and Gustavo Alonso and Donald
                 Kossmann",
  title =        "High availability, elasticity, and strong consistency
                 for massively parallel scans over relational data",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "627--652",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0343-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "An elastic and highly available data store is a key
                 component of many cloud applications. Existing data
                 stores with strong consistency guarantees are designed
                 and optimized for small updates, key-value access, and
                 (if supported) small range queries over a predefined
                 key column. This raises performance and availability
                 problems for applications which inherently require
                 large updates, non-key access, and large range queries.
                 This paper presents a solution to these problems:
                 Crescando/RB; a distributed, scan-based, main memory,
                 relational data store (single table) with robust
                 performance and high availability. The system addresses
                 a real, large-scale industry use case: the Amadeus
                 travel management system. This paper focuses on the
                 distribution layer of Crescando/RB, the problem and
                 theory behind it, the rationale underlying key design
                 decisions, and the novel multicast protocol and
                 replication framework it is composed of. Highlighting
                 the key features of the distribution layer, we present
                 experimental results showing that even under permanent
                 node failures and large-scale data repartitioning,
                 Crescando/RB remains fully available and capable of
                 sustaining a heavy query and update load.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2014:CND,
  author =       "Rui Chen and Benjamin C. Fung and Philip S. Yu and
                 Bipin C. Desai",
  title =        "Correlated network data publication via differential
                 privacy",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "4",
  pages =        "653--676",
  month =        aug,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0344-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Jul 16 17:57:07 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the increasing prevalence of information
                 networks, research on privacy-preserving network data
                 publishing has received substantial attention recently.
                 There are two streams of relevant research, targeting
                 different privacy requirements. A large body of
                 existing works focus on preventing node
                 re-identification against adversaries with structural
                 background knowledge, while some other studies aim to
                 thwart edge disclosure. In general, the line of
                 research on preventing edge disclosure is less
                 fruitful, largely due to lack of a formal privacy
                 model. The recent emergence of differential privacy has
                 shown great promise for rigorous prevention of edge
                 disclosure. Yet recent research indicates that
                 differential privacy is vulnerable to data correlation,
                 which hinders its application to network data that may
                 be inherently correlated. In this paper, we show that
                 differential privacy could be tuned to provide provable
                 privacy guarantees even in the correlated setting by
                 introducing an extra parameter, which measures the
                 extent of correlation. We subsequently provide a
                 holistic solution for non-interactive network data
                 publication. First, we generate a private vertex
                 labeling for a given network dataset to make the
                 corresponding adjacency matrix form dense clusters.
                 Next, we adaptively identify dense regions of the
                 adjacency matrix by a data-dependent partitioning
                 process. Finally, we reconstruct a noisy adjacency
                 matrix by a novel use of the exponential mechanism. To
                 our best knowledge, this is the first work providing a
                 practical solution for publishing real-life network
                 data via differential privacy. Extensive experiments
                 demonstrate that our approach performs well on
                 different types of real-life network datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xiang:2014:AED,
  author =       "Yang Xiang",
  title =        "Answering exact distance queries on real-world graphs
                 with bounded performance guarantees",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "677--695",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0338-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The ability to efficiently obtain exact distance
                 information from both directed and undirected graphs is
                 desired by many real-world applications. In this work,
                 we unified the query indexing efforts on directed and
                 undirected graphs into one by proposing the TreeMap
                 approach. Our approach has very tight bounds on query
                 time, index size, and construction time for answering
                 queries on both directed and undirected graphs. The
                 query time complexity is close to constant for graphs
                 with a small width of tree decomposition, and the index
                 construction can be completed without materializing the
                 distance matrix or other high-cost operations. In the
                 empirical study, we demonstrated that the TreeMap
                 approach in general performs much better than
                 competitive methods in indexing real graphs for
                 answering exact distance queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yao:2014:DMO,
  author =       "Bin Yao and Xiaokui Xiao and Feifei Li and Yifan Wu",
  title =        "Dynamic monitoring of optimal locations in road
                 network databases",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "697--720",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0347-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Optimal location (OL) queries are a type of spatial
                 queries that are particularly useful for the strategic
                 planning of resources. Given a set of existing
                 facilities and a set of clients, an OL query asks for a
                 location to build a new facility that optimizes a
                 certain cost metric (defined based on the distances
                 between the clients and the facilities). Several
                 techniques have been proposed to address OL queries,
                 assuming that all clients and facilities reside in an $
                 L_p $ space. In practice, however, movements between
                 spatial locations are usually confined by the
                 underlying road network, and hence, the actual distance
                 between two locations can differ significantly from
                 their $ L_p $ distance. Motivated by the deficiency of
                 the existing techniques, this paper presents a
                 comprehensive study on OL queries in road networks. We
                 propose a unified framework that addresses three
                 variants of OL queries that find important applications
                 in practice, and we instantiate the framework with
                 several novel query processing algorithms. We further
                 extend our framework to efficiently monitor the OLs
                 when locations for facilities and/or clients have been
                 updated. Our dynamic update methods lead to efficient
                 answering of continuous optimal location queries. We
                 demonstrate the efficiency of our solutions through
                 extensive experiments with large real data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tran:2014:QRE,
  author =       "Quoc Trung Tran and Chee-Yong Chan and Srinivasan
                 Parthasarathy",
  title =        "Query reverse engineering",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "721--746",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0349-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we introduce a new problem termed query
                 reverse engineering (QRE). Given a database $D$ and a
                 result table $T$ --- the output of some known or
                 unknown query $Q$ on $D$ --- the goal of QRE is to
                 reverse-engineer a query $ Q'$ such that the output of
                 query $ Q'$ on database $D$ (denoted by $ Q'(D)$) is
                 equal to $T$ (i.e., $ Q(D)$). The QRE problem has useful
                 applications in database usability, data analysis, and
                 data security. In this work, we propose a data-driven
                 approach, TALOS for {\bf T}ree-based classifier with
                 {\bf A}t {\bf L}east {\bf O}ne {\bf S}emantics, that is
                 based on a novel dynamic data classification
                 formulation and extend the approach to efficiently
                 support the three key dimensions of the QRE problem:
                 whether the input query is known\slash unknown,
                 supporting different query fragments, and supporting
                 multiple database versions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Martinenghi:2014:TBR,
  author =       "Davide Martinenghi and Riccardo Torlone",
  title =        "Taxonomy-based relaxation of query answering in
                 relational databases",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "747--769",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-013-0350-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional information search in which queries are
                 posed against a known and rigid schema over a
                 structured database is shifting toward a Web scenario
                 in which exposed schemas are vague or absent and data
                 come from heterogeneous sources. In this framework,
                 query answering cannot be precise and needs to be
                 relaxed, with the goal of matching user requests with
                 accessible data. In this paper, we propose a logical
                 model and a class of abstract query languages as a
                 foundation for querying relational data sets with vague
                 schemas. Our approach relies on the availability of
                 taxonomies, that is, simple classifications of terms
                 arranged in a hierarchical structure. The model is a
                 natural extension of the relational model in which data
                 domains are organized in hierarchies, according to
                 different levels of generalization between terms. We
                 first propose a conservative extension of the
                 relational algebra for this model in which special
                 operators allow the specification of relaxed queries
                 over vaguely structured information. We also study
                 equivalence and rewriting properties of the algebra
                 that can be used for query optimization. We then
                 illustrate a logic-based query language that can
                 provide a basis for expressing relaxed queries in a
                 declarative way. We finally investigate the expressive
                 power of the proposed query languages and the
                 independence of the taxonomy in this context.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Soria-Comas:2014:EDU,
  author =       "Jordi Soria-Comas and Josep Domingo-Ferrer and David
                 S{\'a}nchez and Sergio Mart{\'\i}nez",
  title =        "Enhancing data utility in differential privacy via
                 microaggregation-based $k$-anonymity",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "771--794",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0351-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "It is not uncommon in the data anonymization
                 literature to oppose the ``old'' $k$-anonymity model to
                 the ``new'' differential privacy model, which offers
                 more robust privacy guarantees. Yet, it is often
                 disregarded that the utility of the anonymized results
                 provided by differential privacy is quite limited, due
                 to the amount of noise that needs to be added to the
                 output, or because utility can only be guaranteed for a
                 restricted type of queries. This is in contrast with
                 $k$-anonymity mechanisms, which make no assumptions on
                 the uses of anonymized data while focusing on
                 preserving data utility from a general perspective. In
                 this paper, we show that a synergy between differential
                 privacy and $k$-anonymity can be found: $k$-anonymity
                 can help improving the utility of differentially
                 private responses to arbitrary queries. We devote
                 special attention to the utility improvement of
                 differentially private published data sets.
                 Specifically, we show that the amount of noise required
                 to fulfill $ \varepsilon $-differential privacy can be
                 reduced if noise is added to a $k$-anonymous version of
                 the data set, where $k$-anonymity is reached through a
                 specially designed microaggregation of all attributes.
                 As a result of noise reduction, the general analytical
                 utility of the anonymized output is increased. The
                 theoretical benefits of our proposal are illustrated in
                 a practical setting with an empirical evaluation on
                 three data sets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Magnani:2014:TBP,
  author =       "Matteo Magnani and Ira Assent and Michael L.
                 Mortensen",
  title =        "Taking the {Big Picture}: representative skylines
                 based on significance and diversity",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "795--815",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0352-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The skyline is a popular operator to extract records
                 from a database when a record scoring function is not
                 available. However, the result of a skyline query can
                 be very large. The problem addressed in this paper is
                 the automatic selection of a small number ($k$) of
                 representative skyline records. Existing approaches
                 have only focused on partial aspects of this problem.
                 Some try to identify sets of diverse records giving an
                 overall approximation of the skyline. These techniques,
                 however, are sensitive to the scaling of attributes or
                 to the insertion of non-skyline records into the
                 database. Others exploit some knowledge of the record
                 scoring function to identify the most significant
                 record, but not sets of records representative of the
                 whole skyline. In this paper, we introduce a novel
                 approach taking both the significance of all the
                 records and their diversity into account, adapting to
                 available knowledge of the scoring function, but also
                 working under complete ignorance. We show the
                 intractability of the problem and present approximate
                 algorithms. We experimentally show that our approach is
                 efficient, scalable and that it improves existing works
                 in terms of the significance and diversity of the
                 results.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sidlauskas:2014:PEM,
  author =       "Darius {\v{S}}idlauskas and Simonas {\v{S}}altenis and
                 Christian S. Jensen",
  title =        "Processing of extreme moving-object update and query
                 workloads in main memory",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "5",
  pages =        "817--841",
  month =        oct,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0353-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Sep 24 08:05:09 MDT 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The efficient processing of workloads that interleave
                 moving-object updates and queries is challenging. In
                 addition to the conflicting needs for update-efficient
                 versus query-efficient data structures, the increasing
                 parallel capabilities of multi-core processors yield
                 challenges. To prevent concurrency anomalies and to
                 ensure correct system behavior, conflicting update and
                 query operations must be serialized. In this setting,
                 it is a key concern to avoid that operations are
                 blocked, which leaves processing cores idle. To enable
                 efficient processing, we first examine concurrency
                 degrees from traditional transaction processing in the
                 context of our target domain and propose new semantics
                 that enable a high degree of parallelism and ensure
                 up-to-date query results. We define the new semantics
                 for range and $k$-nearest neighbor queries. Then, we
                 present a main-memory indexing technique called
                 parallel grid that implements the proposed semantics as
                 well as two other variants supporting different
                 semantics. This enables us to quantify the effects that
                 different degrees of consistency have on performance.
                 We also present an alternative time-partitioning
                 approach. Empirical studies with the above and three
                 existing proposals conducted on modern processors show
                 that our proposals scale near-linearly with the number
                 of hardware threads and thus are able to benefit from
                 increasing on-chip parallelism.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aboulnaga:2014:SSD,
  author =       "Ashraf Aboulnaga and Beng Chin Ooi and Patrick
                 Valduriez",
  title =        "Special section on data-intensive cloud
                 infrastructure",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "843--843",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0371-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kumar:2014:SWA,
  author =       "K. Ashwin Kumar and Abdul Quamar and Amol Deshpande
                 and Samir Khuller",
  title =        "{SWORD}: workload-aware data placement and replica
                 selection for cloud data management systems",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "845--870",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0362-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Cloud computing is increasingly being seen as a way to
                 reduce infrastructure costs and add elasticity, and is
                 being used by a wide range of organizations. Cloud data
                 management systems today need to serve a range of
                 different workloads, from analytical read-heavy
                 workloads to transactional (OLTP) workloads. For both
                 the service providers and the users, it is critical to
                 minimize the consumption of resources like CPU, memory,
                 communication bandwidth, and energy, without
                 compromising on service-level agreements if any. In
                 this article, we develop a workload-aware data
                 placement and replication approach, called SWORD, for
                 minimizing resource consumption in such an environment.
                 Specifically, we monitor and model the expected
                 workload as a hypergraph and develop partitioning
                 techniques that minimize the average query span, i.e.,
                 the average number of machines involved in the
                 execution of a query or a transaction. We empirically
                 justify the use of query span as the metric to
                 optimize, for both analytical and transactional
                 workloads, and develop a series of replication and data
                 placement algorithms by drawing connections to several
                 well-studied graph theoretic concepts. We introduce a
                 suite of novel techniques to achieve high scalability
                 by reducing the overhead of partitioning and query
                 routing. To deal with workload changes, we propose an
                 incremental repartitioning technique that modifies data
                 placement in small steps without resorting to complete
                 repartitioning. We propose the use of fine-grained
                 quorums defined at the level of groups of data items to
                 control the cost of distributed updates, improve
                 throughput, and adapt to different workloads. We
                 empirically illustrate the benefits of our approach
                 through a comprehensive experimental evaluation for two
                 classes of workloads. For analytical read-only
                 workloads, we show that our techniques result in
                 significant reduction in total resource consumption.
                 For OLTP workloads, we show that our approach improves
                 transaction latencies and overall throughput by
                 minimizing the number of distributed transactions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sahli:2014:ASP,
  author =       "Majed Sahli and Essam Mansour and Panos Kalnis",
  title =        "{ACME}: a scalable parallel system for extracting
                 frequent patterns from a very long sequence",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "871--893",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0370-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern applications, including bioinformatics, time
                 series, and web log analysis, require the extraction of
                 frequent patterns, called motifs, from one very long
                 (i.e., several gigabytes) sequence. Existing approaches
                 are either heuristics that are error-prone, or exact
                 (also called combinatorial) methods that are extremely
                 slow, therefore, applicable only to very small
                 sequences (i.e., in the order of megabytes). This paper
                 presents ACME, a combinatorial approach that scales to
                 gigabyte-long sequences and is the first to support
                 supermaximal motifs. ACME is a versatile parallel
                 system that can be deployed on desktop multi-core
                 systems, or on thousands of CPUs in the cloud. However,
                 merely using more compute nodes does not guarantee
                 efficiency, because of the related overheads. To this
                 end, ACME introduces an automatic tuning mechanism that
                 suggests the appropriate number of CPUs to utilize, in
                 order to meet the user constraints in terms of run
                 time, while minimizing the financial cost of cloud
                 resources. Our experiments show that, compared to the
                 state of the art, ACME supports three orders of
                 magnitude longer sequences (e.g., DNA for the entire
                 human genome); handles large alphabets (e.g., English
                 alphabet for Wikipedia); scales out to 16,384 CPUs on a
                 supercomputer; and supports elastic deployment in the
                 cloud.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lo:2014:MGD,
  author =       "Eric Lo and Nick Cheng and Wilfred W. Lin and Wing-Kai
                 Hon and Byron Choi",
  title =        "{MyBenchmark}: generating databases for query
                 workloads",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "895--913",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0354-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "To evaluate the performance of database applications
                 and database management systems (DBMSs), we usually
                 execute workloads of queries on generated databases of
                 different sizes and then benchmark various measures
                 such as respond time and throughput. This paper
                 introduces MyBenchmark, a parallel data generation tool
                 that takes a set of queries as input and generates
                 database instances. Users of MyBenchmark can control
                 the characteristics of the generated data as well as
                 the characteristics of the resulting workload.
                 Applications of MyBenchmark include DBMS testing,
                 database application testing, and application-driven
                 benchmarking. In this paper, we present the
                 architecture and the implementation algorithms of
                 MyBenchmark. Experimental results show that MyBenchmark
                 is able to generate workload-aware databases for a
                 variety of workloads including query workloads
                 extracted from TPC-C, TPC-E, TPC-H, and TPC-W
                 benchmarks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xie:2014:MEB,
  author =       "Qing Xie and Chaoyi Pang and Xiaofang Zhou and
                 Xiangliang Zhang and Ke Deng",
  title =        "Maximum error-bounded {Piecewise Linear
                 Representation} for online stream approximation",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "915--937",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0355-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a time series data stream, the generation of
                 error-bounded Piecewise Linear Representation
                 (error-bounded PLR) is to construct a number of
                 consecutive line segments to approximate the stream,
                 such that the approximation error does not exceed a
                 prescribed error bound. In this work, we consider the
                 error bound in L_\infty L `? norm as approximation
                 criterion, which constrains the approximation error on
                 each corresponding data point, and aim on designing
                 algorithms to generate the minimal number of segments.
                 In the literature, the optimal approximation algorithms
                 are effectively designed based on transformed space
                 other than time-value space, while desirable optimal
                 solutions based on original time domain (i.e.,
                 time-value space) are still lacked. In this article, we
                 proposed two linear-time algorithms to construct
                 error-bounded PLR for data stream based on time domain,
                 which are named OptimalPLR and GreedyPLR, respectively.
                 The OptimalPLR is an optimal algorithm that generates
                 minimal number of line segments for the stream
                 approximation, and the GreedyPLR is an alternative
                 solution for the requirements of high efficiency and
                 resource-constrained environment. In order to evaluate
                 the superiority of OptimalPLR, we theoretically
                 analyzed and compared OptimalPLR with the state-of-art
                 optimal solution in transformed space, which also
                 achieves linear complexity. We successfully proved the
                 theoretical equivalence between time-value space and
                 such transformed space, and also discovered the
                 superiority of OptimalPLR on processing efficiency in
                 practice. The extensive results of empirical evaluation
                 support and demonstrate the effectiveness and
                 efficiency of our proposed algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Alexandrov:2014:SPB,
  author =       "Alexander Alexandrov and Rico Bergmann and Stephan
                 Ewen and Johann-Christoph Freytag and Fabian Hueske and
                 Arvid Heise and Odej Kao and Marcus Leich and Ulf Leser
                 and Volker Markl and Felix Naumann and Mathias Peters
                 and Astrid Rheinl{\"a}nder and Matthias J. Sax and
                 Sebastian Schelter and Mareike H{\"o}ger and Kostas
                 Tzoumas and Daniel Warneke",
  title =        "The {Stratosphere} platform for big data analytics",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "939--964",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0357-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present Stratosphere, an open-source software stack
                 for parallel data analysis. Stratosphere brings
                 together a unique set of features that allow the
                 expressive, easy, and efficient programming of
                 analytical applications at very large scale.
                 Stratosphere's features include ``in situ'' data
                 processing, a declarative query language, treatment of
                 user-defined functions as first-class citizens,
                 automatic program parallelization and optimization,
                 support for iterative programs, and a scalable and
                 efficient execution engine. Stratosphere covers a
                 variety of ``Big Data'' use cases, such as data
                 warehousing, information extraction and integration,
                 data cleansing, graph analysis, and statistical
                 analysis applications. In this paper, we present the
                 overall system architecture design decisions, introduce
                 Stratosphere through example queries, and then dive
                 into the internal workings of the system's components
                 that relate to extensibility, programming model,
                 optimization, and query execution. We experimentally
                 compare Stratosphere against popular open-source
                 alternatives, and we conclude with a research outlook
                 for the next years.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ward:2014:RTC,
  author =       "Phillip G. Ward and Zhen He and Rui Zhang and
                 Jianzhong Qi",
  title =        "Real-time continuous intersection joins over large
                 sets of moving objects using graphic processing units",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "965--985",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0358-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Multiple Time Bucket Join (MTB-join) algorithm is
                 the state of the art for processing the continuous
                 intersection join (CI-join) query over moving objects.
                 It considerably outperforms alternatives, but still
                 falls short of real-time application performance
                 requirements for large sets of moving objects. In this
                 paper, we achieve real-time performance for the CI-join
                 query over large sets of moving objects by exploiting
                 the computational power of commodity graphics
                 processing units (GPUs). We first analyze how the main
                 characteristics of the MTB-join algorithm make it ill
                 suited to GPUs and identify key challenges in designing
                 efficient GPU-based algorithms for the query. We then
                 address these challenges by developing the
                 multi-layered grid join (MLG-join) algorithm which has
                 the following key features: (i) memory locality
                 friendly indexing, (ii) no dynamic memory allocation,
                 (iii) in-place object updates, (iv) lock-free
                 concurrent updates, and (v) massive parallelism. These
                 features unleash the full potential of the memory
                 bandwidth and parallel processing of GPUs. Furthermore,
                 we conduct a theoretical analysis which can predict the
                 pruning power of the MLG-join algorithm given certain
                 parameter values used in the algorithm. This allows us
                 to select optimal parameter values. Through extensive
                 experimental results, we show that our analysis
                 accurately models the MLG-join algorithm's sensitivity
                 to parameter values. The proposed MLG-join algorithm
                 outperforms the MTB-join algorithm, and a GPU-based
                 nested-loops join algorithm, by up to two orders of
                 magnitude, and achieves real-time performance for
                 CI-join queries on large sets of moving objects.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Binnig:2014:DSI,
  author =       "Carsten Binnig and Stefan Hildenbrand and Franz
                 F{\"a}rber and Donald Kossmann and Juchang Lee and
                 Norman May",
  title =        "Distributed snapshot isolation: global transactions
                 pay globally, local transactions pay locally",
  journal =      j-VLDB-J,
  volume =       "23",
  number =       "6",
  pages =        "987--1011",
  month =        dec,
  year =         "2014",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0359-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Nov 24 15:31:08 MST 2014",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern database systems employ Snapshot Isolation to
                 implement concurrency control and isolation because it
                 promises superior query performance compared to
                 lock-based alternatives. Furthermore, Snapshot
                 Isolation never blocks readers, which is an important
                 property for modern information systems, which have
                 mixed workloads of heavy OLAP queries and short update
                 transactions. This paper revisits the problem of
                 implementing Snapshot Isolation in a distributed
                 database system and makes three important
                 contributions. First, a complete definition of
                 Distributed Snapshot Isolation is given, thereby
                 extending existing definitions from the literature.
                 Based on this definition, a set of criteria is proposed
                 to efficiently implement Snapshot Isolation in a
                 distributed system. Second, the design space of
                 alternative methods to implement Distributed Snapshot
                 Isolation is presented based on this set of criteria.
                 Third, a new approach to implement Distributed Snapshot
                 Isolation is devised; we refer to this approach as
                 Incremental. The results of comprehensive performance
                 experiments with the TPC-C benchmark show that the
                 Incremental approach significantly outperforms any
                 other known method from the literature. Furthermore,
                 the Incremental approach requires no a priori knowledge
                 of which nodes of a distributed system are involved in
                 executing a transaction. Also, the Incremental approach
                 can execute transactions that involve data from a
                 single node only with the same efficiency as a
                 centralized database system. This way, the Incremental
                 approach takes advantage of sharding or other ways to
                 improve data locality. The cost for synchronizing
                 transactions in a distributed system is only paid by
                 transactions that actually involve data from several
                 nodes. All these properties make the Incremental
                 approach more practical than related methods proposed
                 in the literature.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Vlachos:2015:CMF,
  author =       "Michail Vlachos and Nikolaos M. Freris and Anastasios
                 Kyrillidis",
  title =        "Compressive mining: fast and optimal data mining in
                 the compressed domain",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "1--24",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0360-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Real-world data typically contain repeated and
                 periodic patterns. This suggests that they can be
                 effectively represented and compressed using only a few
                 coefficients of an appropriate basis (e.g., Fourier and
                 wavelets). However, distance estimation when the data
                 are represented using different sets of coefficients is
                 still a largely unexplored area. This work studies the
                 optimization problems related to obtaining the tightest
                 lower/upper bound on Euclidean distances when each data
                 object is potentially compressed using a different set
                 of orthonormal coefficients. Our technique leads to
                 tighter distance estimates, which translates into more
                 accurate search, learning and mining operations
                 directly in the compressed domain. We formulate the
                 problem of estimating lower/upper distance bounds as an
                 optimization problem. We establish the properties of
                 optimal solutions and leverage the theoretical analysis
                 to develop a fast algorithm to obtain an exact solution
                 to the problem. The suggested solution provides the
                 tightest estimation of the $ L_2$-norm or the
                 correlation. We show that typical data analysis
                 operations, such as $k$-nearest-neighbor search or
                 $k$-Means clustering, can operate more accurately using
                 the proposed compression and distance reconstruction
                 technique. We compare it with many other prevalent
                 compression and reconstruction techniques, including
                 random projections and PCA-based techniques. We
                 highlight a surprising result, namely that when the
                 data are highly sparse in some basis, our technique may
                 even outperform PCA-based compression. The
                 contributions of this work are generic as our
                 methodology is applicable to any sequential or
                 high-dimensional data as well as to any orthogonal data
                 transformation used for the underlying data compression
                 scheme.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sistla:2015:CNN,
  author =       "A. Prasad Sistla and Ouri Wolfson and Bo Xu",
  title =        "Continuous nearest-neighbor queries with location
                 uncertainty",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "25--50",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0361-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we consider the problem of evaluating
                 the continuous query of finding the k k nearest objects
                 with respect to a given point object O_{q} O q among a
                 set of n n moving point-objects. The query returns a
                 sequence of answer-pairs, namely pairs of the form $
                 (I, S) $ such that $I$ is a time interval and $S$ is
                 the set of objects that are closest to $ O_q$ during
                 $I$. When there is uncertainty associated with the
                 locations of the moving objects, $S$ is the set of all
                 the objects that are possibly the $k$ nearest
                 neighbors. We analyze the lower bound and the upper
                 bound on the maximum number of answer-pairs, for the
                 certain case and the uncertain case, respectively.
                 Then, we consider two different types of algorithms.
                 The first is off-line algorithms that compute a priori
                 all the answer-pairs. The second type is on-line
                 algorithms that at any time return the current
                 answer-pair. We present algorithms for the certain case
                 and the uncertain case, respectively, and analyze their
                 complexity. We experimentally compare different
                 algorithms using a database of 1 million objects
                 derived from real-world GPS traces.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gur:2015:SFA,
  author =       "Izzeddin G{\"u}r and Mehmet G{\"u}vercin and Hakan
                 Ferhatosmanoglu",
  title =        "Scaling forecasting algorithms using clustered
                 modeling",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "51--65",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0363-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Research on forecasting has traditionally focused on
                 building more accurate statistical models for a given
                 time series. The models are mostly applied to limited
                 data due to efficiency and scalability problems.
                 However, many enterprise applications require scalable
                 forecasting on large number of data series. For
                 example, telecommunication companies need to forecast
                 each of their customers' traffic load to understand
                 their usage behavior and to tailor targeted campaigns.
                 Forecasting models are typically applied on aggregate
                 data to estimate the total traffic volume for revenue
                 estimation and resource planning. However, they cannot
                 be easily applied to each user individually as building
                 accurate models for large number of users would be time
                 consuming. The problem is exacerbated when the
                 forecasting process is continuous and the models need
                 to be updated periodically. This paper addresses the
                 problem of building and updating forecasting models
                 continuously for multiple data series. We propose
                 dynamic clustered modeling for forecasting by utilizing
                 representative models as an analogy to cluster centers.
                 We apply the models to each individual series through
                 iterative nonlinear optimization. We develop two
                 approaches: The Integrated Clustered Modeling
                 integrates clustering and modeling simultaneously, and
                 the Sequential Clustered Modeling applies them
                 sequentially. Our findings indicate that modeling an
                 individual's behavior using its segment can be more
                 scalable and accurate than the individual model itself.
                 The grouped models avoid overfits and capture common
                 motifs even on noisy data. Experimental results from a
                 telco CRM application show the method is efficient and
                 scalable, and also more accurate than having separate
                 individual models.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kaoudi:2015:RCS,
  author =       "Zoi Kaoudi and Ioana Manolescu",
  title =        "{RDF} in the clouds: a survey",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "67--91",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0364-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Resource Description Framework (RDF) pioneered by
                 the W3C is increasingly being adopted to model data in
                 a variety of scenarios, in particular data to be
                 published or exchanged on the Web. Managing large
                 volumes of RDF data is challenging, due to the sheer
                 size, the heterogeneity, and the further complexity
                 brought by RDF reasoning. To tackle the size challenge,
                 distributed storage architectures are required. Cloud
                 computing is an emerging paradigm massively adopted in
                 many applications for the scalability, fault-tolerance,
                 and elasticity feature it provides, enabling the easy
                 deployment of distributed and parallel architectures.
                 In this article, we survey RDF data management
                 architectures and systems designed for a cloud
                 environment, and more generally, those large-scale RDF
                 data management systems that can be easily deployed
                 therein. We first give the necessary background, then
                 describe the existing systems and proposals in this
                 area, and classify them according to dimensions related
                 to their capabilities and implementation techniques.
                 The survey ends with a discussion of open problems and
                 perspectives.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Su:2015:CTD,
  author =       "Han Su and Kai Zheng and Jiamin Huang and Haozhou Wang
                 and Xiaofang Zhou",
  title =        "Calibrating trajectory data for spatio-temporal
                 similarity analysis",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "93--116",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0365-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Due to the prevalence of GPS-enabled devices and
                 wireless communications technologies, spatial
                 trajectories that describe the movement history of
                 moving objects are being generated and accumulated at
                 an unprecedented pace. Trajectory data in a database
                 are intrinsically heterogeneous, as they represent
                 discrete approximations of original continuous paths
                 derived using different sampling strategies and
                 different sampling rates. Such heterogeneity can have a
                 negative impact on the effectiveness of trajectory
                 similarity measures, which are the basis of many
                 crucial trajectory processing tasks. In this paper, we
                 pioneer a systematic approach to trajectory calibration
                 that is a process to transform a heterogeneous
                 trajectory dataset to one with (almost) unified
                 sampling strategies. Specifically, we propose an
                 anchor-based calibration system that aligns
                 trajectories to a set of anchor points, which are fixed
                 locations independent of trajectory data. After
                 examining four different types of anchor points for the
                 purpose of building a stable reference system, we
                 propose a spatial-only geometry-based calibration
                 approach that considers the spatial relationship
                 between anchor points and trajectories. Then a more
                 advanced spatial-only model-based calibration method is
                 presented, which exploits the power of machine learning
                 techniques to train inference models from historical
                 trajectory data to improve calibration effectiveness.
                 Afterward, since trajectory has temporal information,
                 we extend these two spatial-only trajectory calibration
                 algorithms to incorporate the temporal information,
                 which can infer a proper time stamp to each anchor
                 point of a calibrated trajectory. At last, we provide a
                 solution to reduce cost, i.e., the number of
                 trajectories that is necessary to be re-calibrated, of
                 the updating of the reference system. Finally, we
                 conduct extensive experiments using real trajectory
                 datasets to demonstrate the effectiveness and
                 efficiency of the proposed calibration system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2015:CAI,
  author =       "Hui Li and Sourav S. Bhowmick and Aixin Sun and
                 Jiangtao Cui",
  title =        "Conformity-aware influence maximization in online
                 social networks",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "117--141",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0366-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Influence maximization (im) is the problem of finding
                 a small subset of nodes (seed nodes) in a social
                 network that could maximize the spread of influence.
                 Despite the progress achieved by state-of-the-art
                 greedy im techniques, they suffer from two key
                 limitations. Firstly, they are inefficient as they can
                 take days to find seeds in very large real-world
                 networks. Secondly, although extensive research in
                 social psychology suggests that humans will readily
                 conform to the wishes or beliefs of others,
                 surprisingly, existing im techniques are
                 conformity-unaware. That is, they only utilize an
                 individual's ability to influence another but ignores
                 conformity (a person's inclination to be influenced) of
                 the individuals. In this paper, we propose a novel
                 conformity-aware cascade ($ C^2$) model which leverages
                 on the interplay between influence and conformity in
                 obtaining the influence probabilities of nodes from
                 underlying data for estimating influence spreads. We
                 also propose a variant of this model called $ C^3$
                 model that supports context-specific influence and
                 conformity of nodes. A salient feature of these models
                 is that they are aligned to the popular social forces
                 principle in social psychology. Based on these models,
                 we propose a novel greedy algorithm called cinema that
                 generates high-quality seed set for the im problem. It
                 first partitions, the network into a set of
                 non-overlapping subnetworks and for each of these
                 subnetworks it computes the influence and conformity
                 indices of nodes by analyzing the sentiments expressed
                 by individuals. Each subnetwork is then associated with
                 a cog-sublist which stores the marginal gains of the
                 nodes in the subnetwork in descending order. The node
                 with maximum marginal gain in each cog-sublist is
                 stored in a data structure called mag-list. These
                 structures are manipulated by cinema to efficiently
                 find the seed set. A key feature of such
                 partitioning-based strategy is that each node's
                 influence computation and updates can be limited to the
                 subnetwork it resides instead of the entire network.
                 This paves way for seamless adoption of cinema on a
                 distributed platform. Our empirical study with
                 real-world social networks comprising of millions of
                 nodes demonstrates that cinema as well as its
                 context-aware and distributed variants generate
                 superior quality seed set compared to state-of-the-art
                 im approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deng:2015:UFA,
  author =       "Dong Deng and Guoliang Li and Jianhua Feng and Yi Duan
                 and Zhiguo Gong",
  title =        "A unified framework for approximate dictionary-based
                 entity extraction",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "1",
  pages =        "143--167",
  month =        feb,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0367-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 6 15:25:03 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Dictionary-based entity extraction identifies
                 predefined entities (e.g., person names or locations)
                 from documents. A recent trend for improving extraction
                 recall is to support approximate entity extraction,
                 which finds all substrings from documents that
                 approximately match entities in a given dictionary.
                 Existing methods to address this problem support either
                 token-based similarity (e.g., Jaccard Similarity) or
                 character-based dissimilarity (e.g., Edit Distance). It
                 calls for a unified method to support various
                 similarity/dissimilarity functions, since a unified
                 method can reduce the programming efforts, the hardware
                 requirements, and the manpower. In this paper, we
                 propose a unified framework to support various
                 similarity/dissimilarity functions, such as jaccard
                 similarity, cosine similarity, dice similarity, edit
                 similarity, and edit distance. Since many real-world
                 applications have high-performance requirement for
                 approximate entity extraction on data streams (e.g.,
                 Twitter), we focus on devising efficient algorithms to
                 achieve high performance. We find that many substrings
                 in documents have overlaps, and we can utilize the
                 shared computation across the overlaps to avoid
                 unnecessary redundant computation. To this end, we
                 propose efficient filtering algorithms and develop
                 effective pruning techniques. Experimental results show
                 our method achieves high performance and outperforms
                 state-of-the-art studies significantly.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hung:2015:CAC,
  author =       "Chih-Chieh Hung and Wen-Chih Peng and Wang-Chien Lee",
  title =        "Clustering and aggregating clues of trajectories for
                 mining trajectory patterns and routes",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "169--192",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-011-0262-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we propose a new trajectory pattern
                 mining framework, namely Clustering and Aggregating
                 Clues of Trajectories (CACT), for discovering
                 trajectory routes that represent the frequent movement
                 behaviors of a user. In addition to spatial and
                 temporal biases, we observe that trajectories contain
                 silent durations, i.e., the time durations when no data
                 points are available to describe the movements of
                 users, which bring many challenging issues to
                 trajectory pattern mining. We claim that a movement
                 behavior would leave some clues in its various
                 sampled/observed trajectories. These clues may be
                 extracted from spatially and temporally co-located data
                 points from the observed trajectories. Based on this
                 observation, we propose clue-aware trajectory
                 similarity to measure the clues between two
                 trajectories. Accordingly, we further propose the
                 clue-aware trajectory clustering algorithm to cluster
                 similar trajectories into groups to capture the
                 movement behaviors of the user. Finally, we devise the
                 clue-aware trajectory aggregation algorithm to
                 aggregate trajectories in the same group to derive the
                 corresponding trajectory pattern and route. We validate
                 our ideas and evaluate the proposed CACT framework by
                 experiments using both synthetic and real datasets. The
                 experimental results show that CACT is more effective
                 in discovering trajectory patterns than the
                 state-of-the-art techniques for mining trajectory
                 patterns.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Eichinger:2015:TSC,
  author =       "Frank Eichinger and Pavel Efros and Stamatis
                 Karnouskos and Klemens B{\"o}hm",
  title =        "A time-series compression technique and its
                 application to the smart grid",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "193--218",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0368-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Time-series data is increasingly collected in many
                 domains. One example is the smart electricity
                 infrastructure, which generates huge volumes of such
                 data from sources such as smart electricity meters.
                 Although today these data are used for visualization
                 and billing in mostly 15-min resolution, its original
                 temporal resolution frequently is more fine-grained,
                 e.g., seconds. This is useful for various analytical
                 applications such as short-term forecasting,
                 disaggregation and visualization. However, transmitting
                 and storing huge amounts of such fine-grained data are
                 prohibitively expensive in terms of storage space in
                 many cases. In this article, we present a compression
                 technique based on piecewise regression and two methods
                 which describe the performance of the compression.
                 Although our technique is a general approach for
                 time-series compression, smart grids serve as our
                 running example and as our evaluation scenario.
                 Depending on the data and the use-case scenario, the
                 technique compresses data by ratios of up to factor
                 5,000 while maintaining its usefulness for analytics.
                 The proposed technique has outperformed related work
                 and has been applied to three real-world energy
                 datasets in different scenarios. Finally, we show that
                 the proposed compression technique can be implemented
                 in a state-of-the-art database management system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xue:2015:SDS,
  author =       "Andy Yuan Xue and Jianzhong Qi and Xing Xie and Rui
                 Zhang and Jin Huang and Yuan Li",
  title =        "Solving the data sparsity problem in destination
                 prediction",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "219--243",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0369-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Destination prediction is an essential task for many
                 emerging location-based applications such as
                 recommending sightseeing places and targeted
                 advertising according to destinations. A common
                 approach to destination prediction is to derive the
                 probability of a location being the destination based
                 on historical trajectories. However, almost all the
                 existing techniques use various kinds of extra
                 information such as road network, proprietary travel
                 planner, statistics requested from government, and
                 personal driving habits. Such extra information, in
                 most circumstances, is unavailable or very costly to
                 obtain. Thereby we approach the task of destination
                 prediction by using only historical trajectory dataset.
                 However, this approach encounters the ``data sparsity
                 problem'', i.e., the available historical trajectories
                 are far from enough to cover all possible query
                 trajectories, which considerably limits the number of
                 query trajectories that can obtain predicted
                 destinations. We propose a novel method named
                 Sub-Trajectory Synthesis (SubSyn) to address the data
                 sparsity problem. SubSyn first decomposes historical
                 trajectories into sub-trajectories comprising two
                 adjacent locations, and then connects the
                 sub-trajectories into ``synthesised'' trajectories.
                 This process effectively expands the historical
                 trajectory dataset to contain much more trajectories.
                 Experiments based on real datasets show that SubSyn can
                 predict destinations for up to ten times more query
                 trajectories than a baseline prediction algorithm.
                 Furthermore, the running time of the SubSyn-training
                 algorithm is almost negligible for a large set of 1.9
                 million trajectories, and the SubSyn-prediction
                 algorithm runs over two orders of magnitude faster than
                 the baseline prediction algorithm constantly.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2015:ECS,
  author =       "Zhiwei Zhang and Jeffrey Xu Yu and Lu Qin and Lijun
                 Chang and Xuemin Lin",
  title =        "{I/O} efficient: computing {SCCs} in massive graphs",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "245--270",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0372-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A strongly connected component ($\mathsf{SCC}$) is a
                 maximal subgraph of a directed graph GG in which every
                 pair of nodes is reachable from each other in the
                 $\mathsf{SCC}$. With such a property, a general
                 directed graph can be represented by a directed acyclic
                 graph (DAG) by contracting every $\mathsf{SCC}$ of GG
                 to a node in DAG. In many real applications that need
                 graph pattern matching, topological sorting, or
                 reachability query processing, the best way to deal
                 with a general directed graph is to deal with its DAG
                 representation. Therefore, finding all \mathsf
                 {SCC}SCCs in a directed graph GG is a critical
                 operation. The existing in-memory algorithms based on
                 depth first search (DFS) can find all $\mathsf{SCC}$s
                 in linear time with respect to the size of a graph.
                 However, when a graph cannot reside entirely in the
                 main memory, the existing external or semi-external
                 algorithms to find all $\mathsf{SCC}$s have
                 limitation to achieve high I/O efficiency. In this
                 paper, we study new I/O-efficient semi-external
                 algorithms to find all $\mathsf{SCC}$s for a massive
                 directed graph GG that cannot reside in main memory
                 entirely. To overcome the deficiency of the existing
                 DFS-based semi-external algorithm that heavily relies
                 on a total order, we explore a weak order based on
                 which we investigate new algorithms. We propose a new
                 two-phase algorithm, namely, tree construction and tree
                 search. In the tree construction phase, a spanning tree
                 of GG can be constructed in bounded number of
                 sequential scans of GG. In the tree search phase, it
                 needs to sequentially scan the graph once to find all
                 $\mathsf{SCC}$s. In addition, we propose a new
                 single-phase algorithm, which combines the tree
                 construction and tree search phases into a single
                 phase, with three new optimization techniques. They are
                 early acceptance, early rejection, and batch
                 processing. By the single-phase algorithm with the new
                 optimization techniques, we can significantly reduce
                 the number of I/Os and the CPU cost. We prove the
                 correctness of the algorithms. We conduct extensive
                 experimental studies using 4 real datasets including a
                 massive real dataset and several synthetic datasets to
                 confirm the I/O efficiency of our approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yuan:2015:GSS,
  author =       "Ye Yuan and Guoren Wang and Lei Chen and Haixun Wang",
  title =        "Graph similarity search on large uncertain graph
                 databases",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "271--296",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0373-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many studies have been conducted on seeking an
                 efficient solution for graph similarity search over
                 certain (deterministic) graphs due to its wide
                 application in many fields, including bioinformatics,
                 social network analysis, and Resource Description
                 Framework data management. All prior work assumes that
                 the underlying data is deterministic. However, in
                 reality, graphs are often noisy and uncertain due to
                 various factors, such as errors in data extraction,
                 inconsistencies in data integration, and for
                 privacy-preserving purposes. Therefore, in this paper,
                 we study similarity graph containment search on large
                 uncertain graph databases. Similarity graph containment
                 search consists of subgraph similarity search and
                 supergraph similarity search. Different from previous
                 works assuming that edges in an uncertain graph are
                 independent of each other, we study uncertain graphs
                 where edges' occurrences are correlated. We formally
                 prove that subgraph or supergraph similarity search
                 over uncertain graphs is \#P-hard; thus, we employ a
                 filter-and-verify framework to speed up these two
                 queries. For the subgraph similarity query, in the
                 filtering phase, we develop tight lower and upper
                 bounds of subgraph similarity probability based on a
                 probabilistic matrix index (PMI). PMI is composed of
                 discriminative subgraph features associated with tight
                 lower and upper bounds of subgraph isomorphism
                 probability. Based on PMI, we can filter out a large
                 number of uncertain graphs and maximize the pruning
                 capability. During the verification phase, we develop
                 an efficient sampling algorithm to validate the
                 remaining candidates. For the supergraph similarity
                 query, in the filtering phase, we propose two pruning
                 algorithms, one lightweight and the other strong, based
                 on maximal common subgraphs of query graph and data
                 graph. We run the two pruning algorithms against a
                 probabilistic index that consists of powerful graph
                 features. In the verification, we design an approximate
                 algorithm based on the Horvitz---Thompson estimator to
                 fast validate the remaining candidates. The
                 efficiencies of our proposed solutions to the subgraph
                 and supergraph similarity search have been verified
                 through extensive experiments on real uncertain graph
                 datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2015:TPC,
  author =       "Bin Yang and Chenjuan Guo and Yu Ma and Christian S.
                 Jensen",
  title =        "Toward personalized, context-aware routing",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "2",
  pages =        "297--318",
  month =        apr,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0378-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Mar 18 19:14:35 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A driver's choice of a route to a destination may
                 depend on the route's length and travel time, but a
                 multitude of other, possibly hard-to-formalize aspects,
                 may also factor into the driver's decision. There is
                 evidence that a driver's choice of route is context
                 dependent, e.g., varies across time, and that route
                 choice also varies from driver to driver. In contrast,
                 conventional routing services support little in the way
                 of context dependence, and they deliver the same routes
                 to all drivers. We study how to identify context-aware
                 driving preferences for individual drivers from
                 historical trajectories, and thus how to provide
                 foundations for personalized navigation, but also
                 professional driver education and traffic planning. We
                 provide techniques that are able to capture
                 time-dependent and uncertain properties of dynamic
                 travel costs, such as travel time and fuel consumption,
                 from trajectories, and we provide techniques capable of
                 capturing the driving behaviors of different drivers in
                 terms of multiple dynamic travel costs. Further, we
                 propose techniques that are able to identify a driver's
                 contexts and then to identify driving preferences for
                 each context using historical trajectories from the
                 driver. Empirical studies with a large trajectory data
                 set offer insight into the design properties of the
                 proposed techniques and suggest that they are
                 effective.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2015:TKS,
  author =       "Xin Huang and Hong Cheng and Rong-Hua Li and Lu Qin
                 and Jeffrey Xu Yu",
  title =        "Top-{$K$} structural diversity search in large
                 networks",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "319--343",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0379-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social contagion depicts a process of information
                 (e.g., fads, opinions, news) diffusion in the online
                 social networks. A recent study reports that in a
                 social contagion process, the probability of contagion
                 is tightly controlled by the number of connected
                 components in an individual's neighborhood. Such a
                 number is termed structural diversity of an individual,
                 and it is shown to be a key predictor in the social
                 contagion process. Based on this, a fundamental issue
                 in a social network is to find top-kk users with the
                 highest structural diversities. In this paper, we, for
                 the first time, study the top-kk structural diversity
                 search problem in a large network. Specifically, we
                 study two types of structural diversity measures,
                 namely, component-based structural diversity measure
                 and core-based structural diversity measure. For
                 component-based structural diversity, we develop an
                 effective upper bound of structural diversity for
                 pruning the search space. The upper bound can be
                 incrementally refined in the search process. Based on
                 such upper bound, we propose an efficient framework for
                 top-kk structural diversity search. To further speed up
                 the structural diversity evaluation in the search
                 process, several carefully devised search strategies
                 are proposed. We also design efficient techniques to
                 handle frequent updates in dynamic networks and
                 maintain the top-kk results. We further show how the
                 techniques proposed in component-based structural
                 diversity measure can be extended to handle the
                 core-based structural diversity measure. Extensive
                 experimental studies are conducted in real-world large
                 networks and synthetic graphs, and the results
                 demonstrate the efficiency and effectiveness of the
                 proposed methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Papapetrou:2015:SDS,
  author =       "Odysseas Papapetrou and Minos Garofalakis and Antonios
                 Deligiannakis",
  title =        "Sketching distributed sliding-window data streams",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "345--368",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0380-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "While traditional data management systems focus on
                 evaluating single, ad hoc queries over static data sets
                 in a centralized setting, several emerging applications
                 require (possibly, continuous) answers to queries on
                 dynamic data that is widely distributed and constantly
                 updated. Furthermore, such query answers often need to
                 discount data that is ``stale'' and operate solely on a
                 sliding window of recent data arrivals (e.g., data
                 updates occurring over the last 24 h). Such distributed
                 data streaming applications mandate novel algorithmic
                 solutions that are both time and space efficient (to
                 manage high-speed data streams) and also communication
                 efficient (to deal with physical data distribution). In
                 this paper, we consider the problem of complex query
                 answering over distributed, high-dimensional data
                 streams in the sliding-window model. We introduce a
                 novel sketching technique (termed ECM-sketch) that
                 allows effective summarization of streaming data over
                 both time-based and count-based sliding windows with
                 probabilistic accuracy guarantees. Our sketch structure
                 enables point, as well as inner product, queries and
                 can be employed to address a broad range of problems,
                 such as maintaining frequency statistics, finding heavy
                 hitters, and computing quantiles in the sliding-window
                 model. Focusing on distributed environments, we
                 demonstrate how ECM-sketches of individual, local
                 streams can be composed to generate a (low-error)
                 ECM-sketch summary of the order-preserving merging of
                 all streams; furthermore, we show how ECM-sketches can
                 be exploited for continuous monitoring of
                 sliding-window queries over distributed streams. Our
                 extensive experimental study with two real-life data
                 sets validates our theoretical claims and verifies the
                 effectiveness of our techniques. To the best of our
                 knowledge, ours is the first work to address efficient,
                 guaranteed-error complex query answering over
                 distributed data streams in the sliding-window model.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yuan:2015:EDS,
  author =       "Ye Yuan and Guoren Wang and Jeffery Yu Xu and Lei
                 Chen",
  title =        "Efficient distributed subgraph similarity matching",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "369--394",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0381-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a query graph qq and a data graph GG, subgraph
                 similarity matching is to retrieve all matches of qq in
                 GG with the number of missing edges bounded by a given
                 threshold $ \epsilon \in ? $. Many works have been
                 conducted to study the problem of subgraph similarity
                 matching due to its ability to handle applications
                 involved with noisy or erroneous graph data. In
                 practice, a data graph can be extremely large, e.g., a
                 web-scale graph containing hundreds of millions of
                 vertices and billions of edges. The state-of-the-art
                 approaches employ centralized algorithms to process the
                 subgraph similarity queries, and thus, they are
                 infeasible for such a large graph due to the limited
                 computational power and storage space of a centralized
                 server. To address this problem, in this paper, we
                 investigate subgraph similarity matching for a
                 web-scale graph deployed in a distributed environment.
                 We propose distributed algorithms and optimization
                 techniques that exploit the properties of subgraph
                 similarity matching, so that we can well utilize the
                 parallel computing power and lower the communication
                 cost among the distributed data centers for query
                 processing. Specifically, we first relax and decompose
                 qq into a minimum number of sub-queries. Next, we send
                 each sub-query to conduct the exact matching in
                 parallel. Finally, we schedule and join the exact
                 matches to obtain final query answers. Moreover, our
                 workload-balance strategy further speeds up the query
                 processing. Our experimental results demonstrate the
                 feasibility of our proposed approach in performing
                 subgraph similarity matching over web-scale graph
                 data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mirylenka:2015:CHH,
  author =       "Katsiaryna Mirylenka and Graham Cormode and Themis
                 Palpanas and Divesh Srivastava",
  title =        "Conditional heavy hitters: detecting interesting
                 correlations in data streams",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "395--414",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0382-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The notion of heavy hitters--items that make up a
                 large fraction of the population--has been successfully
                 used in a variety of applications across sensor and
                 RFID monitoring, network data analysis, event mining,
                 and more. Yet this notion often fails to capture the
                 semantics we desire when we observe data in the form of
                 correlated pairs. Here, we are interested in items that
                 are conditionally frequent: when a particular item is
                 frequent within the context of its parent item. In this
                 work, we introduce and formalize the notion of
                 conditional heavy hitters to identify such items, with
                 applications in network monitoring and Markov chain
                 modeling. We explore the relationship between
                 conditional heavy hitters and other related notions in
                 the literature, and show analytically and
                 experimentally the usefulness of our approach. We
                 introduce several algorithm variations that allow us to
                 efficiently find conditional heavy hitters for input
                 data with very different characteristics, and provide
                 analytical results for their performance. Finally, we
                 perform experimental evaluations with several synthetic
                 and real datasets to demonstrate the efficacy of our
                 methods and to study the behavior of the proposed
                 algorithms for different types of data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2015:ECP,
  author =       "Yunjun Gao and Lu Chen and Xinhan Li and Bin Yao and
                 Gang Chen",
  title =        "Efficient $ k k$-closest pair queries in general
                 metric spaces",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "415--439",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0383-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given two object sets PP and QQ, a k-closest
                 pair(k\hbox {CP})(kCP)query finds kk closest object
                 pairs from P\times QP$ \times $Q. This operation is
                 common in many real-life applications such as GIS, data
                 mining, and recommender systems. Although it has
                 received much attention in the Euclidean space, there
                 is little prior work on the metric space. In this
                 paper, we study the problem of kCP query processing in
                 general metric spaces, namely Metric kCP(\hbox
                 {M}k\hbox {CP})(MkCP)search, and propose several
                 efficient algorithms using dynamic disk-based metric
                 indexes (e.g., M-tree), which can be applied to
                 arbitrary type of data as long as a certain metric
                 distance is defined and satisfies the triangle
                 inequality. Our approaches follow depth-first and/or
                 best-first traversal paradigm(s), employ effective
                 pruning rules based on metric space properties and the
                 counting information preserved in the metric index,
                 take advantage of aggressive pruning and compensation
                 to further boost query efficiency, and derive a
                 node-based cost model for \hbox {M}k\hbox {CP}MkCP
                 retrieval. In addition, we extend our techniques to
                 tackle two interesting variants of \hbox {M}k\hbox
                 {CP}MkCP queries. Extensive experiments with both real
                 and synthetic data sets demonstrate the performance of
                 our proposed algorithms, the effectiveness of our
                 developed pruning rules, and the accuracy of our
                 presented cost model.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aksoy:2015:RPE,
  author =       "Cem Aksoy and Aggeliki Dimitriou and Dimitri
                 Theodoratos",
  title =        "Reasoning with patterns to effectively answer {XML}
                 keyword queries",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "3",
  pages =        "441--465",
  month =        jun,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0384-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 15 17:21:03 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Keyword search is a popular technique for searching
                 tree-structured data on the Web because it frees the
                 user from knowing a complex query language and the
                 structure of the data sources. However, the imprecision
                 of the keyword queries usually results in a very large
                 number of results of which only a few are relevant to
                 the query. Multiple previous approaches have tried to
                 address this problem. They exploit the structural
                 properties of the tree data in order to filter out
                 irrelevant results. This is not an easy task though,
                 and in the general case, these approaches show low
                 precision and/or recall and low quality of result
                 ranking. In this paper, we argue that exploiting the
                 structural relationships of the query matches locally
                 in the data tree is not sufficient and a global
                 analysis of the keyword matches in the data tree is
                 necessary in order to assign meaningful semantics to
                 keyword queries. We present an original approach for
                 answering keyword queries which extracts structural
                 patterns of the query matches and reasons with them in
                 order to return meaningful results ranked with respect
                 to their relevance to the query. Comparisons between
                 patterns are realized based on different types of
                 homomorphisms between patterns. As the number of
                 patterns is typically much smaller than that of the of
                 query matches, this global reasoning is feasible. We
                 design an efficient stack-based algorithm for
                 evaluating keyword queries on tree-structured data, and
                 we also devise a heuristic extension which further
                 improves its performance. We run comprehensive
                 experiments on different datasets to evaluate the
                 efficiency of the algorithms and the effectiveness of
                 our ranking and filtering semantics. The experimental
                 results show that our approach produces results of
                 higher quality compared to previous ones and our
                 algorithms are fast and scale well with respect to the
                 input and output size.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Roy:2015:TAO,
  author =       "Senjuti Basu Roy and Ioanna Lykourentzou and Saravanan
                 Thirumuruganathan and Sihem Amer-Yahia and Gautam Das",
  title =        "Task assignment optimization in knowledge-intensive
                 crowdsourcing",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "467--491",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0385-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present SmartCrowd, a framework for optimizing task
                 assignment in knowledge-intensive crowdsourcing (KI-C).
                 SmartCrowd distinguishes itself by formulating, for the
                 first time, the problem of worker-to-task assignment in
                 KI-C as an optimization problem, by proposing efficient
                 adaptive algorithms to solve it and by accounting for
                 human factors, such as worker expertise, wage
                 requirements, and availability inside the optimization
                 process. We present rigorous theoretical analyses of
                 the task assignment optimization problem and propose
                 optimal and approximation algorithms with guarantees,
                 which rely on index pre-computation and adaptive
                 maintenance. We perform extensive performance and
                 quality experiments using real and synthetic data to
                 demonstrate that the SmartCrowd approach is necessary
                 to achieve efficient task assignments of high-quality
                 under guaranteed cost budget.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bao:2015:GFR,
  author =       "Zhifeng Bao and Yong Zeng and Tok Wang Ling and
                 Dongxiang Zhang and Guoliang Li and H. V. Jagadish",
  title =        "A general framework to resolve the {MisMatch} problem
                 in {XML} keyword search",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "493--518",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0386-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "When users issue a query to a database, they have
                 expectations about the results. If what they search for
                 is unavailable in the database, the system will return
                 an empty result or, worse, erroneous mismatch results.
                 We call this problem the MisMatch problem. In this
                 paper, we solve the MisMatch problem in the context of
                 XML keyword search. Our solution is based on two novel
                 concepts that we introduce: target node type and
                 Distinguishability. Target Node Type represents the
                 type of node a query result intends to match, and
                 Distinguishability is used to measure the importance of
                 the query keywords. Using these concepts, we develop a
                 low-cost post-processing algorithm on the results of
                 query evaluation to detect the MisMatch problem and
                 generate helpful suggestions to users. Our approach has
                 three noteworthy features: (1) for queries with the
                 MisMatch problem, it generates the explanation,
                 suggested queries and their sample results as the
                 output to users, helping users judge whether the
                 MisMatch problem is solved without reading all query
                 results; (2) it is portable as it can work with any
                 lowest common ancestor-based matching semantics (for
                 XML data without ID references) or minimal Steiner
                 tree-based matching semantics (for XML data with ID
                 references) which return tree structures as results. It
                 is orthogonal to the choice of result retrieval method
                 adopted; (3) it is lightweight in the way that it
                 occupies a very small proportion of the whole query
                 evaluation time. Extensive experiments on three real
                 datasets verify the effectiveness, efficiency and
                 scalability of our approach. A search engine called
                 XClear has been built and is available at
                 http://xclear.comp.nus.edu.sg.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kotsifakos:2015:EBS,
  author =       "Alexios Kotsifakos and Isak Karlsson and Panagiotis
                 Papapetrou and Vassilis Athitsos and Dimitrios
                 Gunopulos",
  title =        "Embedding-based subsequence matching with gaps ---
                 range --- tolerances: a {Query-By-Humming}
                 application",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "519--536",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0387-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We present a subsequence matching framework that
                 allows for gaps in both query and target sequences,
                 employs variable matching tolerance efficiently tuned
                 for each query and target sequence, and constrains the
                 maximum matching range. Using this framework, a dynamic
                 programming method is proposed, called SMBGT, that,
                 given a short query sequence Q and a large database,
                 identifies in quadratic time the subsequence of the
                 database that best matches Q. SMBGT is highly
                 applicable to music retrieval. However, in
                 Query-By-Humming applications, runtime is critical.
                 Hence, we propose a novel embedding-based approach,
                 called ISMBGT, for speeding up search under SMBGT.
                 Using a set of reference sequences, ISMBGT maps both Q
                 and each position of each database sequence into
                 vectors. The database vectors closest to the query
                 vector are identified, and SMBGT is then applied
                 between Q and the subsequences that correspond to those
                 database vectors. The key novelties of ISMBGT are that
                 it does not require training, it is query sensitive,
                 and it exploits the flexibility of SMBGT. We present an
                 extensive experimental evaluation using synthetic and
                 hummed queries on a large music database. Our findings
                 show that ISMBGT can achieve speedups of up to an order
                 of magnitude against brute-force search and over an
                 order of magnitude against cDTW, while maintaining a
                 retrieval accuracy very close to that of brute-force
                 search.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Skovsgaard:2015:FTR,
  author =       "Anders Skovsgaard and Christian S. Jensen",
  title =        "Finding top-$k$ relevant groups of spatial web
                 objects",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "537--555",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0388-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The web is increasingly being accessed from
                 geo-positioned devices such as smartphones, and rapidly
                 increasing volumes of web content are geo-tagged. In
                 addition, studies show that a substantial fraction of
                 all web queries has local intent. This development
                 motivates the study of advanced spatial keyword-based
                 querying of web content. Previous research has
                 primarily focused on the retrieval of the top-k
                 individual spatial web objects that best satisfy a
                 query specifying a location and a set of keywords. This
                 paper proposes a new type of query functionality that
                 returns top-k groups of objects while taking into
                 account aspects such as group density, distance to the
                 query, and relevance to the query keywords. To enable
                 efficient processing, novel indexing and query
                 processing techniques for single and multiple keyword
                 queries are proposed. Empirical performance studies
                 with an implementation of the techniques and real data
                 suggest that the proposals are viable in practical
                 settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abedjan:2015:PRD,
  author =       "Ziawasch Abedjan and Lukasz Golab and Felix Naumann",
  title =        "Profiling relational data: a survey",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "557--581",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0389-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Profiling data to determine metadata about a given
                 dataset is an important and frequent activity of any IT
                 professional and researcher and is necessary for
                 various use-cases. It encompasses a vast array of
                 methods to examine datasets and produce metadata. Among
                 the simpler results are statistics, such as the number
                 of null values and distinct values in a column, its
                 data type, or the most frequent patterns of its data
                 values. Metadata that are more difficult to compute
                 involve multiple columns, namely correlations, unique
                 column combinations, functional dependencies, and
                 inclusion dependencies. Further techniques detect
                 conditional properties of the dataset at hand. This
                 survey provides a classification of data profiling
                 tasks and comprehensively reviews the state of the art
                 for each class. In addition, we review data profiling
                 tools and systems from research and industry. We
                 conclude with an outlook on the future of data
                 profiling beyond traditional profiling tasks and beyond
                 relational databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deutch:2015:PBA,
  author =       "Daniel Deutch and Yuval Moskovitch and Val Tannen",
  title =        "Provenance-based analysis of data-centric processes",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "4",
  pages =        "583--607",
  month =        aug,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0390-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Aug 8 13:52:45 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider in this paper static analysis of the
                 possible executions of data-dependent applications,
                 namely applications whose control flow is guided by a
                 finite-state machine, as well as by the state of an
                 underlying database. We note that previous work in this
                 context has not addressed two important features of
                 such analysis, namely analysis under hypothetical
                 scenarios, such as changes to the application's state
                 machine and/or to the underlying database, and the
                 consideration of meta-data, such as cost or access
                 privileges. Observing that semiring-based provenance
                 has been proven highly effective in supporting these
                 two features for database queries, we develop in this
                 paper a semiring-based provenance framework for the
                 analysis of data-dependent processes, accounting for
                 hypothetical reasoning and meta-data. The development
                 addresses two interacting new challenges: (1) combining
                 provenance annotations for both information that
                 resides in the database and information about external
                 inputs (e.g., user choices) and (2) finitely capturing
                 infinitely many process executions. We have implemented
                 our framework as part of the PROPOLIS system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bohlen:2015:SIB,
  author =       "Michael H. B{\"o}hlen and Christoph Koch",
  title =        "Special issue on best papers of {VLDB 2013}",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "5",
  pages =        "609--610",
  month =        oct,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0401-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 18 06:51:09 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yan:2015:ALK,
  author =       "Zhepeng Yan and Nan Zheng and Zachary G. Ives and
                 Partha Pratim Talukdar and Cong Yu",
  title =        "Active learning in keyword search-based data
                 integration",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "5",
  pages =        "611--631",
  month =        oct,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0374-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 18 06:51:09 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The problem of scaling up data integration, such that
                 new sources can be quickly utilized as they are
                 discovered, remains elusive: Global schemas for
                 integrated data are difficult to develop and expand,
                 and schema and record matching techniques are limited
                 by the fact that data and metadata are often
                 under-specified and must be disambiguated by data
                 experts. One promising approach is to avoid using a
                 global schema, and instead to develop keyword
                 search-based data integration--where the system lazily
                 discovers associations enabling it to join together
                 matches to keywords, and return ranked results. The
                 user is expected to understand the data domain and
                 provide feedback about answers' quality. The system
                 generalizes such feedback to learn how to correctly
                 integrate data. A major open challenge is that under
                 this model, the user only sees and offers feedback on a
                 few ``top-kk'' results: This result set must be
                 carefully selected to include answers of high relevance
                 and answers that are highly informative when feedback
                 is given on them. Existing systems merely focus on
                 predicting relevance, by composing the scores of
                 various schema and record matching algorithms. In this
                 paper, we show how to predict the uncertainty
                 associated with a query result's score, as well as how
                 informative feedback is on a given result. We build
                 upon these foundations to develop an active learning
                 approach to keyword search-based data integration, and
                 we validate the effectiveness of our solution over real
                 data from several very different domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zou:2015:CDA,
  author =       "Tao Zou and Ronan Bras and Marcos Vaz Salles and Alan
                 Demers and Johannes Gehrke",
  title =        "{ClouDiA}: a deployment advisor for public clouds",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "5",
  pages =        "633--653",
  month =        oct,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0375-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 18 06:51:09 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "An increasing number of distributed data-driven
                 applications are moving into shared public clouds. By
                 sharing resources and operating at scale, public clouds
                 promise higher utilization and lower costs than private
                 clusters. To achieve high utilization, however, cloud
                 providers inevitably allocate virtual machine instances
                 non-contiguously; i.e., instances of a given
                 application may end-up in physically distant machines
                 in the cloud. This allocation strategy can lead to
                 large differences in average latency between instances.
                 For a large class of applications, this difference can
                 result in significant performance degradation, unless
                 care is taken in how application components are mapped
                 to instances. In this paper, we propose ClouDiA, a
                 general deployment advisor that selects application
                 node deployments minimizing either (i) the largest
                 latency between application nodes, or (ii) the longest
                 critical path among all application nodes. ClouDiA
                 employs a number of algorithmic techniques, including
                 mixed-integer programming and constraint programming
                 techniques, to efficiently search the space of possible
                 mappings of application nodes to instances. Through
                 experiments with synthetic and real applications in
                 Amazon EC2, we show that mean latency is a robust
                 metric to model communication cost in these
                 applications and that our search techniques yield a
                 15---55 \% reduction in time-to-solution or service
                 response time, without any need for modifying
                 application code.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2015:SAP,
  author =       "Fanwei Zhu and Yuan Fang and Kevin Chen-Chuan Chang
                 and Jing Ying",
  title =        "Scheduled approximation for {Personalized PageRank}
                 with {Utility-based Hub Selection}",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "5",
  pages =        "655--679",
  month =        oct,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0376-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 18 06:51:09 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "As Personalized PageRank has been widely leveraged for
                 ranking on a graph, the efficient computation of
                 Personalized PageRank Vector (PPV) becomes a prominent
                 issue. In this paper, we propose FastPPV, an
                 approximate PPV computation algorithm that is
                 incremental and accuracy-aware. Our approach hinges on
                 a novel paradigm of scheduled approximation: the
                 computation is partitioned and scheduled for processing
                 in an ``organized'' way, such that we can gradually
                 improve our PPV estimation in an incremental manner and
                 quantify the accuracy of our approximation at query
                 time. Guided by this principle, we develop an efficient
                 hub-based realization, where we adopt the metric of hub
                 length to partition and schedule random walk tours so
                 that the approximation error reduces exponentially over
                 iterations. In addition, as tours are segmented by
                 hubs, the shared substructures between different tours
                 (around the same hub) can be reused to speed up query
                 processing both within and across iterations. Given the
                 key roles played by the hubs, we further investigate
                 the problem of hub selection. In particular, we develop
                 a conceptual model to select hubs based on the two
                 desirable properties of hubs--sharing and
                 discriminating, and present several different
                 strategies to realize the conceptual model. Finally, we
                 evaluate FastPPV over two real-world graphs, and show
                 that it not only significantly outperforms two
                 state-of-the-art baselines in both online and offline
                 phrases, but also scales well on larger graphs. In
                 particular, we are able to achieve near-constant time
                 online query processing irrespective of graph size.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ren:2015:VLM,
  author =       "Kun Ren and Alexander Thomson and Daniel J. Abadi",
  title =        "{VLL}: a lock manager redesign for main memory
                 database systems",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "5",
  pages =        "681--705",
  month =        oct,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-014-0377-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Sep 18 06:51:09 MDT 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Lock managers are increasingly becoming a bottleneck
                 in database systems that use pessimistic concurrency
                 control. In this paper, we introduce very lightweight
                 locking (VLL), an alternative approach to pessimistic
                 concurrency control for main memory database systems,
                 which avoids almost all overhead associated with
                 traditional lock manager operations. We also propose a
                 protocol called selective contention analysis (SCA),
                 which enables systems implementing VLL to achieve high
                 transactional throughput under high-contention
                 workloads. We implement these protocols both in a
                 traditional single-machine multi-core database server
                 setting and in a distributed database where data are
                 partitioned across many commodity machines in a
                 shared-nothing cluster. Furthermore, we show how VLL
                 and SCA can be extended to enable range locking. Our
                 experiments show that VLL dramatically reduces locking
                 overhead and thereby increases transactional throughput
                 in both settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Galarraga:2015:FRM,
  author =       "Luis Gal{\'a}rraga and Christina Teflioudi and Katja
                 Hose and Fabian M. Suchanek",
  title =        "Fast rule mining in ontological knowledge bases with
                 {AMIE++}",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "707--730",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0394-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recent advances in information extraction have led to
                 huge knowledge bases (KBs), which capture knowledge in
                 a machine-readable format. Inductive logic programming
                 (ILP) can be used to mine logical rules from these KBs,
                 such as ``If two persons are married, then they
                 (usually) live in the same city.'' While ILP is a
                 mature field, mining logical rules from KBs is
                 difficult, because KBs make an open-world assumption.
                 This means that absent information cannot be taken as
                 counterexamples. Our approach AMIE (Gal&\#225;rraga et
                 al. in WWW, 2013) has shown how rules can be mined
                 effectively from KBs even in the absence of
                 counterexamples. In this paper, we show how this
                 approach can be optimized to mine even larger KBs with
                 more than 12M statements. Extensive experiments show
                 how our new approach, AMIE++, extends to areas of
                 mining that were previously beyond reach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chandra:2015:DGT,
  author =       "Bikash Chandra and Bhupesh Chawda and Biplab Kar and
                 K. V. Reddy and Shetal Shah and S. Sudarshan",
  title =        "Data generation for testing and grading {SQL}
                 queries",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "731--755",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0395-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Correctness of SQL queries is usually tested by
                 executing the queries on one or more datasets.
                 Erroneous queries are often the results of small
                 changes or mutations of the correct query. A mutation
                 Q'`? of a query Q is killed by a dataset D if Q(D) \ne
                 `? Q'`?(D). Earlier work on the XData system showed how
                 to generate datasets that kill all mutations in a class
                 of mutations that included join type and comparison
                 operation mutations. In this paper, we extend the XData
                 data generation techniques to handle a wider variety of
                 SQL queries and a much larger class of mutations. We
                 have also built a system for grading SQL queries using
                 the datasets generated by XData. We present a study of
                 the effectiveness of the datasets generated by the
                 extended XData approach, using a variety of queries
                 including queries submitted by students as part of a
                 database course. We show that the XData datasets
                 outperform predefined datasets as well as manual
                 grading done earlier by teaching assistants, while also
                 avoiding the drudgery of manual correction. Thus, we
                 believe that our techniques will be of great value to
                 database course instructors and TAs, particularly to
                 those of MOOCs. It will also be valuable to database
                 application developers and testers for testing SQL
                 queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2015:MMO,
  author =       "Chao Li and Gerome Miklau and Michael Hay and Andrew
                 Mcgregor and Vibhor Rastogi",
  title =        "The matrix mechanism: optimizing linear counting
                 queries under differential privacy",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "757--781",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0398-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Differential privacy is a robust privacy standard that
                 has been successfully applied to a range of data
                 analysis tasks. We describe the matrix mechanism, an
                 algorithm for answering a workload of linear counting
                 queries that adapts the noise distribution to
                 properties of the provided queries. Given a workload,
                 the mechanism uses a different set of queries, called a
                 query strategy, which are answered using a standard
                 Laplace or Gaussian mechanism. Noisy answers to the
                 workload queries are then derived from the noisy
                 answers to the strategy queries. This two-stage process
                 can result in a more complex, correlated noise
                 distribution that preserves differential privacy but
                 increases accuracy. We provide a formal analysis of the
                 error of query answers produced by the mechanism and
                 investigate the problem of computing the optimal query
                 strategy in support of a given workload. We show that
                 this problem can be formulated as a rank-constrained
                 semidefinite program. We analyze two seemingly distinct
                 techniques proposed in the literature, whose similar
                 behavior is explained by viewing them as instances of
                 the matrix mechanism. We also describe an extension of
                 the mechanism in which nonnegativity constraints are
                 included in the derivation process and provide
                 experimental evidence of its efficacy.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Armenatzoglou:2015:GSR,
  author =       "Nikos Armenatzoglou and Ritesh Ahuja and Dimitris
                 Papadias",
  title =        "{Geo-Social Ranking}: functions and query processing",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "783--799",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0400-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a query location q, Geo-Social Ranking (GSR)
                 ranks the users of a Geo-Social Network based on their
                 distance to q, the number of their friends in the
                 vicinity of q, and possibly the connectivity of those
                 friends. We propose a general GSR framework and four
                 GSR functions that assign scores in different ways: (i)
                 LC, which is a weighted linear combination of social
                 (i.e., friendships) and spatial (i.e., distance to q)
                 aspects, (ii) RC, which is a ratio combination of the
                 two aspects, (iii) HGS, which considers the number of
                 friends in coincident circles centered at q, and (iv)
                 GST, which takes into account triangles of friends in
                 the vicinity of q. We investigate the behavior of the
                 functions, qualitatively assess their results, and
                 study the effects of their parameters. Moreover, for
                 each ranking function, we design a query processing
                 technique that utilizes its specific characteristics to
                 efficiently retrieve the top-k users. Finally, we
                 experimentally evaluate the performance of the top-k
                 algorithms with real and synthetic datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Santini:2015:QSU,
  author =       "Simone Santini",
  title =        "Querying streams using regular expressions: some
                 semantics, decidability, and efficiency issues",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "801--821",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0402-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper analyzes the decidability and complexity
                 problems that arise when matching regular expressions
                 on infinite streams of sets of symbols. We show that in
                 important application domains, several apparently
                 obvious semantics lead to detecting spurious events
                 (events that are mere artifacts of the semantics) or to
                 missing events of potential interest. We single out a
                 class of semantics, of interest in many applications,
                 which we dub use-and-throw: In a use-and-throw
                 semantics, an elementary event can participate in the
                 creation of at most one detected complex event. Many
                 areas of research have identified this as a desirable
                 requirement (we give the examples of databases and
                 video surveillance), but hitherto there has been no
                 systematic study of the characteristics of these
                 semantics, in particular their decidability and
                 algorithmic complexity. This paper is meant to provide
                 at least some initial answers on this subject. We
                 analyze several semantics, provide polynomial
                 algorithms for them, and prove their correctness and
                 their properties.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2015:ATE,
  author =       "Xiang Wang and Ying Zhang and Wenjie Zhang and Xuemin
                 Lin and Wei Wang",
  title =        "{AP-Tree}: efficiently support location-aware
                 {Publish\slash Subscribe}",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "823--848",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0403-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We investigate the problem of efficiently supporting
                 location-aware Publish/Subscribe (Pub/Sub for short),
                 which is essential in many applications such as
                 location-based recommendation and advertising, thanks
                 to the proliferation of geo-equipped devices and the
                 ensuing location-based social media applications. In a
                 location-aware Pub/Sub system (e.g., an e-coupon
                 system), subscribers can register their interest as
                 spatial-keyword subscriptions (e.g., interest in nearby
                 iphone discount); each incoming geo-textual message
                 (e.g., geo-tagged e-coupon) will be delivered to all
                 the relevant subscribers immediately. While there are
                 several prior approaches aiming at providing efficient
                 processing techniques for this problem, their
                 approaches belong to spatial-prioritized indexing
                 method which cannot well exploit the keyword
                 distribution. In addition, their textual filtering
                 techniques are built upon simple variants of
                 traditional inverted indexes, which do not perform well
                 for the textual constraint imposed by the problem. In
                 this paper, we address the above limitations and
                 provide a highly efficient solution based on a novel
                 adaptive index, named AP-Tree. AP-Tree adaptively
                 groups registered subscriptions using keyword and
                 spatial partitions, guided by a cost model. AP-Tree
                 also naturally indexes ordered keyword combinations.
                 Furthermore, we show that our techniques can be
                 extended to process moving spatial-keyword
                 subscriptions, where subscribers can continuously
                 update their locations. We present efficient algorithms
                 to process both stationary and moving subscriptions,
                 which can seamlessly and effectively integrate keyword
                 and spatial partitions. Our extensive experiments
                 demonstrate that AP-Tree and its variant AP ^{+}+ -Tree
                 can achieve up to an order of magnitude improvement on
                 efficiency compared with prior state-of-the-art
                 methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Basik:2015:STS,
  author =       "Fuat Bas{\i}k and Bu{\u{g}}ra Gedik and Hakan
                 Ferhatosmano{\u{g}}lu and Mert Emin Kalender",
  title =        "{S$^{33}$-TM}: scalable streaming short text
                 matching",
  journal =      j-VLDB-J,
  volume =       "24",
  number =       "6",
  pages =        "849--866",
  month =        dec,
  year =         "2015",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0404-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Nov 25 15:38:42 MST 2015",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Micro-blogging services have become major venues for
                 information creation, as well as channels of
                 information dissemination. Accordingly, monitoring them
                 for relevant information is a critical capability. This
                 is typically achieved by registering content-based
                 subscriptions with the micro-blogging service. Such
                 subscriptions are long-running queries that are
                 evaluated against the stream of posts. Given the
                 popularity and scale of micro-blogging services like
                 Twitter and Weibo, building a scalable infrastructure
                 to evaluate these subscriptions is a challenge. To
                 address this challenge, we present the S^33-TM system
                 for streaming short text matching. S^33-TM is organized
                 as a stream processing application, in the form of a
                 data parallel flow graph designed to be run on a data
                 center environment. It takes advantage of the structure
                 of the publications (posts) and subscriptions to
                 perform the matching in a scalable manner, without
                 broadcasting publications or subscriptions to all of
                 the matcher instances. The basic design of S^33-TM uses
                 a scoped multicast for publications and scoped anycast
                 for subscriptions. To further improve throughput, we
                 introduce publication routing algorithms that aim at
                 minimizing the scope of the multicasts. First set of
                 algorithms we develop are based on partitioning the
                 word co-occurrence frequency graph, with the aim of
                 routing posts that include commonly co-occurring words
                 to a small set of matchers. While effective, these
                 algorithms fell short in balancing the load. To address
                 this, we develop the SALB algorithm, which provides
                 better load balance by modeling the load more
                 accurately using the word-to-post bipartite graph. We
                 also develop a subscription placement algorithm, called
                 LASP, to group together similar subscriptions, in order
                 to minimize the subscription matching cost.
                 Furthermore, to achieve good scalability for increasing
                 number of nodes, we introduce techniques to handle
                 workload skew. Finally, we introduce load shedding
                 techniques for handling unexpected load spikes with
                 small impact on the accuracy. Our experimental results
                 show that S^33-TM is scalable. Furthermore, the SALB
                 algorithm provides more than 2.5\times 2.5$ \times $
                 throughput compared to the baseline multicast and
                 outperforms the graph partitioning-based approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jagadish:2016:SIB,
  author =       "H. V. Jagadish and Aoying Zhou",
  title =        "Special issue on best papers of {VLDB 2014}",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0399-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jiang:2016:EES,
  author =       "Dawei Jiang and Sai Wu and Gang Chen and Beng Chin Ooi
                 and Kian-Lee Tan and Jun Xu",
  title =        "{epiC}: an extensible and scalable system for
                 processing {Big Data}",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "3--26",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0393-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Big Data problem is characterized by the so-called
                 3V features: volume --- a huge amount of data, velocity
                 --- a high data ingestion rate, and variety --- a mix
                 of structured data, semi-structured data, and
                 unstructured data. The state-of-the-art solutions to
                 the Big Data problem are largely based on the MapReduce
                 framework (aka its open source implementation Hadoop).
                 Although Hadoop handles the data volume challenge
                 successfully, it does not deal with the data variety
                 well since the programming interfaces and its
                 associated data processing model are inconvenient and
                 inefficient for handling structured data and graph
                 data. This paper presents epiC, an extensible system to
                 tackle the Big Data's data variety challenge. epiC
                 introduces a general Actor-like concurrent programming
                 model, independent of the data processing models, for
                 specifying parallel computations. Users process
                 multi-structured datasets with appropriate epiC
                 extensions, and the implementation of a data processing
                 model best suited for the data type and auxiliary code
                 for mapping that data processing model into epiC's
                 concurrent programming model. Like Hadoop, programs
                 written in this way can be automatically parallelized
                 and the runtime system takes care of fault tolerance
                 and inter-machine communications. We present the design
                 and implementation of epiC's concurrent programming
                 model. We also present two customized data processing
                 models, an optimized MapReduce extension and a
                 relational model, on top of epiC. We show how users can
                 leverage epiC to process heterogeneous data by linking
                 different types of operators together. To improve the
                 performance of complex analytic jobs, epiC supports a
                 partition-based optimization technique where data are
                 streamed between the operators to avoid the high I/O
                 overheads. Experiments demonstrate the effectiveness
                 and efficiency of our proposed epiC.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Schuhknecht:2016:EEA,
  author =       "Felix Martin Schuhknecht and Alekh Jindal and Jens
                 Dittrich",
  title =        "An experimental evaluation and analysis of database
                 cracking",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "27--52",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0397-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Database cracking has been an area of active research
                 in recent years. The core idea of database cracking is
                 to create indexes adaptively and incrementally as a
                 side product of query processing. Several works have
                 proposed different cracking techniques for different
                 aspects including updates, tuple reconstruction,
                 convergence, concurrency control, and robustness. Our
                 2014 VLDB paper ``The Uncracked Pieces in Database
                 Cracking'' (PVLDB 7:97---108, 2013/VLDB 2014) was the
                 first comparative study of these different methods by
                 an independent group. In this article, we extend our
                 published experimental study on database cracking and
                 bring it to an up-to-date state. Our goal is to
                 critically review several aspects, identify the
                 potential, and propose promising directions in database
                 cracking. With this study, we hope to expand the scope
                 of database cracking and possibly leverage cracking in
                 database engines other than MonetDB. We repeat several
                 prior database cracking works including the core
                 cracking algorithms as well as three other works on
                 convergence (hybrid cracking), tuple reconstruction
                 (sideways cracking), and robustness (stochastic
                 cracking), respectively. Additionally to our conference
                 paper, we now also look at a recently published study
                 about CPU efficiency (predication cracking). We
                 evaluate these works and show possible directions to do
                 even better. As a further extension, we evaluate the
                 whole class of parallel cracking algorithms that were
                 proposed in three recent works. Altogether, in this
                 work we revisit 8 papers on database cracking and
                 evaluate in total 18 cracking methods, 6 sorting
                 algorithms, and 3 full index structures. Additionally,
                 we test cracking under a variety of experimental
                 settings, including high selectivity (Low selectivity
                 means that many entries qualify. Consequently, a high
                 selectivity means, that only few entries qualify)
                 queries, low selectivity queries, varying selectivity,
                 and multiple query access patterns. Finally, we compare
                 cracking against different sorting algorithms as well
                 as against different main memory optimized indexes,
                 including the recently proposed adaptive radix tree
                 (ART). Our results show that: (1) the previously
                 proposed cracking algorithms are repeatable, (2) there
                 is still enough room to significantly improve the
                 previously proposed cracking algorithms, (3)
                 parallelizing cracking algorithms efficiently is a hard
                 task, (4) cracking depends heavily on query
                 selectivity, (5) cracking needs to catch up with modern
                 indexing trends, and (6) different indexing algorithms
                 have different indexing signatures.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jugel:2016:VAV,
  author =       "Uwe Jugel and Zbigniew Jerzak and Gregor Hackenbroich
                 and Volker Markl",
  title =        "{VDDA}: automatic visualization-driven data
                 aggregation in relational databases",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "53--77",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0396-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Contemporary RDBMS-based systems for visualization of
                 high-volume numerical data have difficulty to cope with
                 the hard latency requirements and high ingestion rates
                 of interactive visualizations. Existing solutions for
                 lowering the volume of large data sets disregard the
                 spatial properties of visualizations, resulting in
                 visualization errors. In this work, we introduce VDDA,
                 a visualization-driven data aggregation that models
                 visual aggregation at the pixel level as data
                 aggregation at the query level. Based on the M4
                 aggregation for producing pixel-perfect line charts
                 from highly reduced data subsets, we define a complete
                 set of data reduction operators that simulate the
                 overplotting behavior of the most frequently used chart
                 types. Relying only on the relational algebra and the
                 common data aggregation functions, our approach is
                 generic and applicable to any visualization system that
                 consumes data stored in relational databases. We
                 demonstrate our visualization-driven data aggregation
                 using real-world data sets from high-tech
                 manufacturing, stock markets, and sports analytics,
                 reducing data volumes by up to two orders of magnitude,
                 while preserving pixel-perfect visualizations, as
                 producible from the raw data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2016:EDL,
  author =       "Wei Wang and Xiaoyan Yang and Beng Chin Ooi and
                 Dongxiang Zhang and Yueting Zhuang",
  title =        "Effective deep learning-based multi-modal retrieval",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "79--101",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0391-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Multi-modal retrieval is emerging as a new search
                 paradigm that enables seamless information retrieval
                 from various types of media. For example, users can
                 simply snap a movie poster to search for relevant
                 reviews and trailers. The mainstream solution to the
                 problem is to learn a set of mapping functions that
                 project data from different modalities into a common
                 metric space in which conventional indexing schemes for
                 high-dimensional space can be applied. Since the
                 effectiveness of the mapping functions plays an
                 essential role in improving search quality, in this
                 paper, we exploit deep learning techniques to learn
                 effective mapping functions. In particular, we first
                 propose a general learning objective that effectively
                 captures both intramodal and intermodal semantic
                 relationships of data from heterogeneous sources. Given
                 the general objective, we propose two learning
                 algorithms to realize it: (1) an unsupervised approach
                 that uses stacked auto-encoders and requires minimum
                 prior knowledge on the training data and (2) a
                 supervised approach using deep convolutional neural
                 network and neural language model. Our training
                 algorithms are memory efficient with respect to the
                 data volume. Given a large training dataset, we split
                 it into mini-batches and adjust the mapping functions
                 continuously for each batch. Experimental results on
                 three real datasets demonstrate that our proposed
                 methods achieve significant improvement in search
                 accuracy over the state-of-the-art solutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Funke:2016:KPC,
  author =       "Stefan Funke and Andr{\'e} Nusser and Sabine
                 Storandt",
  title =        "On {$k$-Path Covers} and their applications",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "1",
  pages =        "103--123",
  month =        feb,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0392-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jan 21 17:41:55 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "For a directed graph G with vertex set V, we call a
                 subset $ C \subseteq V $ a $k$-(All-)Path Cover if $C$
                 contains a node from any simple path in $G$ consisting
                 of $k$ nodes. This paper considers the problem of
                 constructing small $k$-Path Covers in the context of
                 road networks with millions of nodes and edges. In many
                 application scenarios, the set C and its induced
                 overlay graph constitute a very compact synopsis of
                 $G$, which is the basis for the currently fastest data
                 structure for personalized shortest path queries,
                 visually pleasing overlays of subsampled paths, and
                 efficient reporting, retrieval and aggregation of
                 associated data in spatial network databases. Apart
                 from a theoretic investigation of the problem, we
                 provide efficient algorithms that produce very small
                 $k$-Path Covers for large real-world road networks
                 (with a posteriori guarantees via instance-based lower
                 bounds). We also apply our algorithms to other (social,
                 collaboration, web, etc.) networks and can improve in
                 several instances upon previous approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Quamar:2016:NNC,
  author =       "Abdul Quamar and Amol Deshpande and Jimmy Lin",
  title =        "{NScale}: neighborhood-centric large-scale graph
                 analytics in the cloud",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "125--150",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0405-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "There is an increasing interest in executing complex
                 analyses over large graphs, many of which require
                 processing a large number of multi-hop neighborhoods or
                 subgraphs. Examples include ego network analysis, motif
                 counting, finding social circles, personalized
                 recommendations, link prediction, anomaly detection,
                 analyzing influence cascades, and others. These tasks
                 are not well served by existing vertex-centric graph
                 processing frameworks, where user programs are only
                 able to directly access the state of a single vertex at
                 a time, resulting in high communication, scheduling,
                 and memory overheads in executing such tasks. Further,
                 most existing graph processing frameworks ignore the
                 challenges in extracting the relevant portions of the
                 graph that an analysis task is interested in, and
                 loading those onto distributed memory. This paper
                 introduces NScale, a novel end-to-end graph processing
                 framework that enables the distributed execution of
                 complex subgraph-centric analytics over large-scale
                 graphs in the cloud. NScale enables users to write
                 programs at the level of subgraphs rather than at the
                 level of vertices. Unlike most previous graph
                 processing frameworks, which apply the user program to
                 the entire graph, NScale allows users to declaratively
                 specify subgraphs of interest. Our framework includes a
                 novel graph extraction and packing (GEP) module that
                 utilizes a cost-based optimizer to partition and pack
                 the subgraphs of interest into memory on as few
                 machines as possible. The distributed execution engine
                 then takes over and runs the user program in parallel
                 on those subgraphs, restricting the scope of the
                 execution appropriately, and utilizes novel techniques
                 to minimize memory consumption by exploiting overlaps
                 among the subgraphs. We present a comprehensive
                 empirical evaluation comparing against three
                 state-of-the-art systems, namely Giraph, GraphLab, and
                 GraphX, on several real-world datasets and a variety of
                 analysis tasks. Our experimental results show
                 orders-of-magnitude improvements in performance and
                 drastic reductions in the cost of analytics compared to
                 vertex-centric approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Soule:2016:RAS,
  author =       "Robert Soul{\'e} and Bugra Gedik",
  title =        "{RailwayDB}: adaptive storage of interaction graphs",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "151--169",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0407-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We are living in an ever more connected world, where
                 data recording the interactions between people,
                 software systems, and the physical world is becoming
                 increasingly prevalent. These data often take the form
                 of a temporally evolving graph, where entities are the
                 vertices and the interactions between them are the
                 edges. We call such graphs interaction graphs. Various
                 domains, including telecommunications, transportation,
                 and social media, depend on analytics performed on
                 interaction graphs. The ability to efficiently support
                 historical analysis over interaction graphs requires
                 effective solutions for the problem of data layout on
                 disk. This paper presents an adaptive disk layout
                 called the railway layout for optimizing disk block
                 storage for interaction graphs. The key idea is to
                 divide blocks into one or more sub-blocks. Each
                 sub-block contains the entire graph structure, but only
                 a subset of the attributes. This improves query I/O, at
                 the cost of increased storage overhead. We introduce
                 optimal integer linear program (ILP) formulations for
                 partitioning disk blocks into sub-blocks with
                 overlapping and nonoverlapping attributes.
                 Additionally, we present greedy heuristics that can
                 scale better compared to the ILP alternatives, yet
                 achieve close to optimal query I/O. We provide an
                 implementation of the railway layout as part of
                 RailwayDB--an open-source graph database we have
                 developed. To demonstrate the benefits of the railway
                 layout, we provide an extensive experimental
                 evaluation, including model-based as well as empirical
                 results comparing our approach to baseline
                 alternatives.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yuan:2016:DTK,
  author =       "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang
                 and Wenjie Zhang",
  title =        "Diversified top-$k$ clique search",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "171--196",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0408-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Maximal clique enumeration is a fundamental problem in
                 graph theory and has been extensively studied. However,
                 maximal clique enumeration is time-consuming in large
                 graphs and always returns enormous cliques with large
                 overlaps. Motivated by this, in this paper, we study
                 the diversified top-k clique search problem which is to
                 find top-k cliques that can cover most number of nodes
                 in the graph. Diversified top-k clique search can be
                 widely used in a lot of applications including
                 community search, motif discovery, and anomaly
                 detection in large graphs. A naive solution for
                 diversified top-k clique search is to keep all maximal
                 cliques in memory and then find k of them that cover
                 most nodes in the graph by using the approximate greedy
                 max k-cover algorithm. However, such a solution is
                 impractical when the graph is large. In this paper,
                 instead of keeping all maximal cliques in memory, we
                 devise an algorithm to maintain k candidates in the
                 process of maximal clique enumeration. Our algorithm
                 has limited memory footprint and can achieve a
                 guaranteed approximation ratio. We also introduce a
                 novel light-weight \mathsf {PNP}PNP-\mathsf
                 {Index}Index, based on which we design an optimal
                 maximal clique maintenance algorithm. We further
                 explore three optimization strategies to avoid
                 enumerating all maximal cliques and thus largely reduce
                 the computational cost. Besides, for the massive input
                 graph, we develop an I/O efficient algorithm to tackle
                 the problem when the input graph cannot fit in main
                 memory. We conduct extensive performance studies on
                 real graphs and synthetic graphs. One of the real
                 graphs contains 1.02 billion edges. The results
                 demonstrate the high efficiency and effectiveness of
                 our approach.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pham:2016:ACW,
  author =       "Thao N. Pham and Panos K. Chrysanthis and Alexandros
                 Labrinidis",
  title =        "Avoiding class warfare: managing continuous queries
                 with differentiated classes of service",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "197--221",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0411-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data stream management systems (DSMSs) offer the most
                 effective solution for processing data streams by
                 efficiently executing continuous queries (CQs) over the
                 incoming data. CQs inherently have different levels of
                 criticality and hence different levels of expected
                 quality of service (QoS) and quality of data (QoD).
                 Adhering to such expected QoS/QoD metrics is even more
                 important in cases of multi-tenant data stream
                 management services. In this work, we propose DILoS, a
                 framework that, through priority-based scheduling and
                 load shedding, supports differentiated QoS and QoD for
                 multiple classes of CQs. Unlike existing works that
                 consider scheduling and load shedding separately, DILoS
                 is a novel unified framework that exploits the synergy
                 between scheduling and load shedding. We also propose
                 ALoMa, a general, adaptive load manager that DILoS is
                 built upon. By its design, ALoMa performs better than
                 the state-of-the-art alternatives in three dimensions:
                 (1) it automatically tunes the headroom factor, (2) it
                 honors the delay target, (3) it is applicable to
                 complex query networks with shared operators. We
                 implemented DILoS and ALoMa in our real DSMS prototype
                 system (AQSIOS) and evaluate their performance for a
                 variety of real and synthetic workloads. Our
                 experimental evaluation of ALoMa verified its clear
                 superiority over the state-of-the-art approaches. Our
                 experimental evaluation of the DILoS framework showed
                 that it (a) allows the scheduler and load shedder to
                 consistently honor CQs' priorities, (b) significantly
                 increases system capacity utilization by exploiting
                 batch processing, and (c) enables operator sharing
                 among query classes of different priorities while
                 avoiding priority inversion, i.e., a lower-priority
                 class never blocks a higher-priority one.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Langer:2016:EOD,
  author =       "Philipp Langer and Felix Naumann",
  title =        "Efficient order dependency detection",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "223--241",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0412-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Order dependencies (ODs) describe a relationship of
                 order between lists of attributes in a relational
                 table. ODs can help to understand the semantics of
                 datasets and the applications producing them. They have
                 applications in the field of query optimization by
                 suggesting query rewrites. Also, the existence of an OD
                 in a table can provide hints on which integrity
                 constraints are valid for the domain of the data at
                 hand. This work is the first to describe the discovery
                 problem for order dependencies in a principled manner
                 by characterizing the search space, developing and
                 proving pruning rules, and presenting the algorithm
                 Order, which finds all order dependencies in a given
                 table. Order traverses the lattice of permutations of
                 attributes in a level-wise bottom-up manner. In a
                 comprehensive evaluation, we show that it is efficient
                 even for various large datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Peng:2016:PSQ,
  author =       "Peng Peng and Lei Zou and M. Tamer {\"O}zsu and Lei
                 Chen and Dongyan Zhao",
  title =        "Processing {SPARQL} queries over distributed {RDF}
                 graphs",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "243--268",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0415-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose techniques for processing SPARQL queries
                 over a large RDF graph in a distributed environment. We
                 adopt a ``partial evaluation and assembly'' framework.
                 Answering a SPARQL query Q is equivalent to finding
                 subgraph matches of the query graph Q over RDF graph G.
                 Based on properties of subgraph matching over a
                 distributed graph, we introduce local partial match as
                 partial answers in each fragment of RDF graph G. For
                 assembly, we propose two methods: centralized and
                 distributed assembly. We analyze our algorithms from
                 both theoretically and experimentally. Extensive
                 experiments over both real and benchmark RDF
                 repositories of billions of triples confirm that our
                 method is superior to the state-of-the-art methods in
                 both the system's performance and scalability.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2016:TCP,
  author =       "Jun Gao and Chang Zhou and Jeffrey Xu Yu",
  title =        "Toward continuous pattern detection over evolving
                 large graph with snapshot isolation",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "2",
  pages =        "269--290",
  month =        apr,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0416-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Mar 25 16:34:05 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper studies continuous pattern detection over
                 large evolving graphs, which plays an important role in
                 monitoring-related applications. The problem is
                 challenging due to the large size and dynamic updates
                 of graphs, the massive search space of pattern
                 detection and inconsistent query results on dynamic
                 graphs. This paper first introduces a snapshot
                 isolation requirement, which ensures that the query
                 results come from a consistent graph snapshot instead
                 of a mixture of partial evolving graphs. Second, we
                 propose an SSD (single sink directed acyclic graph)
                 plan friendly to vertex-centric-distributed graph
                 processing frameworks. SSD plan can guide the message
                 transformation and transfer among graph vertices, and
                 determine the satisfaction of the pattern on graph
                 vertices for the sink vertex. Third, we devise
                 strategies for major steps in the SSD evaluation,
                 including the location of valid messages to achieve
                 snapshot isolation, AO-List to determine the
                 satisfaction of transition rule over dynamic graph, and
                 message-on-change policy to reduce outgoing messages.
                 The experiments on billion-edge graphs using Giraph, an
                 open source implementation of Pregel, illustrate the
                 efficiency and effectiveness of our method.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Baumann:2016:BDC,
  author =       "Stephan Baumann and Peter Boncz and Kai-Uwe Sattler",
  title =        "Bitwise dimensional co-clustering for analytical
                 workloads",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "291--316",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0417-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Analytical workloads in data warehouses often include
                 heavy joins where queries involve multiple fact tables
                 in addition to the typical star-patterns, dimensional
                 grouping and selections. In this paper we propose a new
                 processing and storage framework called bitwise
                 dimensional co-clustering (BDCC) that avoids
                 replication and thus keeps updates fast, yet is able to
                 accelerate all these foreign key joins, efficiently
                 support grouping and pushes down most dimensional
                 selections. The core idea of BDCC is to cluster each
                 table on a mix of dimensions, each possibly derived
                 from attributes imported over an incoming foreign key
                 and this way creating foreign key connected tables with
                 partially shared clusterings. These are later used to
                 accelerate any join between two tables that have some
                 dimension in common and additionally permit to push
                 down and propagate selections (reduce I/O) and
                 accelerate aggregation and ordering operations. Besides
                 the general framework, we describe an algorithm to
                 derive such a physical co-clustering database
                 automatically and describe query processing and query
                 optimization techniques that can easily be fitted into
                 existing relational engines. We present an experimental
                 evaluation on the TPC-H benchmark in the Vectorwise
                 system, showing that co-clustering can significantly
                 enhance its already high performance and at the same
                 time significantly reduce the memory consumption of the
                 system.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2016:EAF,
  author =       "Feifei Li and Ke Yi and Yufei Tao and Bin Yao and Yang
                 Li and Dong Xie and Min Wang",
  title =        "Exact and approximate flexible aggregate similarity
                 search",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "317--338",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0418-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Aggregate similarity search, also known as aggregate
                 nearest-neighbor (Ann) query, finds many useful
                 applications in spatial and multimedia databases. Given
                 a group Q of M query objects, it retrieves from a
                 database the objects most similar to Q, where the
                 similarity is an aggregation (e.g.,
                 {{\mathrm{sum}}}sum, \max max) of the distances between
                 each retrieved object p and all the objects in Q. In
                 this paper, we propose an added flexibility to the
                 query definition, where the similarity is an
                 aggregation over the distances between p and any subset
                 of \phi M`?M objects in Q for some {support0$<$} \phi
                 \le 10{$<$}`?{$<$}=1. We call this new definition
                 flexible aggregate similarity search and accordingly
                 refer to a query as a flexible aggregate
                 nearest-neighbor ( Fann ) query. We present algorithms
                 for answering Fann queries exactly and approximately.
                 Our approximation algorithms are especially appealing,
                 which are simple, highly efficient, and work well in
                 both low and high dimensions. They also return
                 near-optimal answers with guaranteed constant-factor
                 approximations in any dimensions. Extensive experiments
                 on large real and synthetic datasets from 2 to 74
                 dimensions have demonstrated their superior efficiency
                 and high quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guzun:2016:HQO,
  author =       "Gheorghi Guzun and Guadalupe Canahuate",
  title =        "Hybrid query optimization for hard-to-compress
                 bit-vectors",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "339--354",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0419-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Bit-vectors are widely used for indexing and
                 summarizing data due to their efficient processing in
                 modern computers. Sparse bit-vectors can be further
                 compressed to reduce their space requirement. Special
                 compression schemes based on run-length encoders have
                 been designed to avoid explicit decompression and
                 minimize the decoding overhead during query execution.
                 Moreover, highly compressed bit-vectors can exhibit a
                 faster query time than the non-compressed ones.
                 However, for hard-to-compress bit-vectors, compression
                 does not speed up queries and can add considerable
                 overhead. In these cases, bit-vectors are often stored
                 verbatim (non-compressed). On the other hand, queries
                 are answered by executing a cascade of bit-wise
                 operations involving indexed bit-vectors and
                 intermediate results. Often, even when the original
                 bit-vectors are hard to compress, the intermediate
                 results become sparse. It could be feasible to improve
                 query performance by compressing these bit-vectors as
                 the query is executed. In this scenario, it would be
                 necessary to operate verbatim and compressed
                 bit-vectors together. In this paper, we propose a
                 hybrid framework where compressed and verbatim bitmaps
                 can coexist and design algorithms to execute queries
                 under this hybrid model. Our query optimizer is able to
                 decide at run time when to compress or decompress a
                 bit-vector. Our heuristics show that the applications
                 using higher-density bitmaps can benefit from using
                 this hybrid model, improving both their query time and
                 memory utilization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Harbi:2016:ASQ,
  author =       "Razen Harbi and Ibrahim Abdelaziz and Panos Kalnis and
                 Nikos Mamoulis and Yasser Ebrahim and Majed Sahli",
  title =        "Accelerating {SPARQL} queries by exploiting hash-based
                 locality and adaptive partitioning",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "355--380",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0420-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "State-of-the-art distributed RDF systems partition
                 data across multiple computer nodes (workers). Some
                 systems perform cheap hash partitioning, which may
                 result in expensive query evaluation. Others try to
                 minimize inter-node communication, which requires an
                 expensive data preprocessing phase, leading to a high
                 startup cost. Apriori knowledge of the query workload
                 has also been used to create partitions, which,
                 however, are static and do not adapt to workload
                 changes. In this paper, we propose AdPart, a
                 distributed RDF system, which addresses the
                 shortcomings of previous work. First, AdPart applies
                 lightweight partitioning on the initial data, which
                 distributes triples by hashing on their subjects; this
                 renders its startup overhead low. At the same time, the
                 locality-aware query optimizer of AdPart takes full
                 advantage of the partitioning to (1) support the fully
                 parallel processing of join patterns on subjects and
                 (2) minimize data communication for general queries by
                 applying hash distribution of intermediate results
                 instead of broadcasting, wherever possible. Second,
                 AdPart monitors the data access patterns and
                 dynamically redistributes and replicates the instances
                 of the most frequent ones among workers. As a result,
                 the communication cost for future queries is
                 drastically reduced or even eliminated. To control
                 replication, AdPart implements an eviction policy for
                 the redistributed patterns. Our experiments with
                 synthetic and real data verify that AdPart: (1) starts
                 faster than all existing systems; (2) processes
                 thousands of queries before other systems become
                 online; and (3) gracefully adapts to the query load,
                 being able to evaluate queries on billion-scale RDF
                 data in subseconds.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bonifati:2016:MEO,
  author =       "Angela Bonifati and Werner Nutt and Riccardo Torlone
                 and Jan {Van Den Bussche}",
  title =        "Mapping-equivalence and oid-equivalence of
                 single-function object-creating conjunctive queries",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "381--397",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0421-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Conjunctive database queries have been extended with a
                 mechanism for object creation to capture important
                 applications such as data exchange, data integration,
                 and ontology-based data access. Object creation
                 generates new object identifiers in the result that do
                 not belong to the set of constants in the source
                 database. The new object identifiers can be also seen
                 as Skolem terms. Hence, object-creating conjunctive
                 queries can also be regarded as restricted second-order
                 tuple-generating dependencies (SO-tgds), considered in
                 the data exchange literature. In this paper, we focus
                 on the class of single-function object-creating
                 conjunctive queries, or sifo CQs for short. The
                 single-function symbol can be used only once in the
                 head of the query. We give a new characterization for
                 oid-equivalence of sifo CQs that is simpler than the
                 one given by Hull and Yoshikawa and places the problem
                 in the complexity class NP. Our characterization is
                 based on Cohen's equivalence notions for conjunctive
                 queries with multiplicities. We also solve the logical
                 entailment problem for sifo CQs, showing that also this
                 problem belongs to NP. Results by Pichler et al. have
                 shown that logical equivalence for more general classes
                 of SO-tgds is either undecidable or decidable with as
                 yet unknown complexity upper bounds.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lu:2016:DCE,
  author =       "Yue Lu and Yuguan Li and Mohamed Y. Eltabakh",
  title =        "Decorating the cloud: enabling annotation management
                 in {MapReduce}",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "399--424",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0422-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data curation and annotation are indispensable
                 mechanisms to a wide range of applications for
                 capturing various types of metadata information. This
                 metadata not only increases the data's credibility and
                 merit, and allows end users and applications to make
                 more informed decisions, but also enables advanced
                 processing over the data that is not feasible
                 otherwise. That is why annotation management has been
                 extensively studied in the context of scientific
                 repositories, web documents, and relational database
                 systems. In this paper, we make the case that
                 cloud-based applications that rely on the emerging
                 Hadoop infrastructure are also in need for data
                 curation and annotation and that the presence of such
                 mechanisms in Hadoop would bring value-added
                 capabilities to these applications. We propose the
                 ``CloudNotes'' system, a full-fledged MapReduce-based
                 annotation management engine. CloudNotes addresses
                 several new challenges to annotation management
                 including: (1) scalable and distributed processing of
                 annotations over large clusters, (2) propagation of
                 annotations under the MapReduce's blackbox execution
                 model, and (3) annotation-driven optimizations ranging
                 from proactive prefetching and colocation of
                 annotations, annotation-aware task scheduling, novel
                 shared execution strategies among the annotation jobs,
                 and concurrency control mechanisms for annotation
                 management. These challenges have not been addressed or
                 explored before by the state-of-art technologies.
                 CloudNotes is built on top of the open-source
                 Hadoop/HDFS infrastructure and experimentally evaluated
                 to demonstrate the practicality and scalability of its
                 features, and the effectiveness of its optimizations
                 under large workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sariyuce:2016:IKC,
  author =       "Ahmet Erdem Sariy{\"u}ce and Bugra Gedik and Gabriela
                 Jacques-Silva and Kun-Lung Wu and {\"U}mit V.
                 {\c{C}}ataly{\"u}rek",
  title =        "Incremental $k$-core decomposition: algorithms and
                 evaluation",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "3",
  pages =        "425--447",
  month =        jun,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0423-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue May 24 16:31:54 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A $k$-core of a graph is a maximal connected subgraph
                 in which every vertex is connected to at least k
                 vertices in the subgraph. $k$-core decomposition is
                 often used in large-scale network analysis, such as
                 community detection, protein function prediction,
                 visualization, and solving NP-hard problems on real
                 networks efficiently, like maximal clique finding. In
                 many real-world applications, networks change over
                 time. As a result, it is essential to develop efficient
                 incremental algorithms for dynamic graph data. In this
                 paper, we propose a suite of incremental $k$-core
                 decomposition algorithms for dynamic graph data. These
                 algorithms locate a small subgraph that is guaranteed
                 to contain the list of vertices whose maximum $k$-core
                 values have changed and efficiently process this
                 subgraph to update the $k$-core decomposition. We
                 present incremental algorithms for both insertion and
                 deletion operations, and propose auxiliary vertex state
                 maintenance techniques that can further accelerate
                 these operations. Our results show a significant
                 reduction in runtime compared to non-incremental
                 alternatives. We illustrate the efficiency of our
                 algorithms on different types of real and synthetic
                 graphs, at varying scales. For a graph of 16 million
                 vertices, we observe relative throughputs reaching a
                 million times, relative to the non-incremental
                 algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Luo:2016:QDS,
  author =       "Ge Luo and Lu Wang and Ke Yi and Graham Cormode",
  title =        "Quantiles over data streams: experimental comparisons,
                 new analyses, and further improvements",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "449--472",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0424-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:41:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A fundamental problem in data management and analysis
                 is to generate descriptions of the distribution of
                 data. It is most common to give such descriptions in
                 terms of the cumulative distribution, which is
                 characterized by the quantiles of the data. The design
                 and engineering of efficient methods to find these
                 quantiles has attracted much study, especially in the
                 case where the data are given incrementally, and we
                 must compute the quantiles in an online, streaming
                 fashion. While such algorithms have proved to be
                 extremely useful in practice, there has been limited
                 formal comparison of the competing methods, and no
                 comprehensive study of their performance. In this
                 paper, we remedy this deficit by providing a taxonomy
                 of different methods and describe efficient
                 implementations. In doing so, we propose new variants
                 that have not been studied before, yet which outperform
                 existing methods. To illustrate this, we provide
                 detailed experimental comparisons demonstrating the
                 trade-offs between space, time, and accuracy for
                 quantile computation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xie:2016:EEI,
  author =       "Xike Xie and Benjin Mei and Jinchuan Chen and Xiaoyong
                 Du and Christian S. Jensen",
  title =        "{Elite}: an elastic infrastructure for big
                 spatiotemporal trajectories",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "473--493",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0425-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:41:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "As the volumes of spatiotemporal trajectory data
                 continue to grow at a rapid pace; a new generation of
                 data management techniques is needed in order to be
                 able to utilize these data to provide a range of
                 data-driven services, including geographic-type
                 services. Key challenges posed by spatiotemporal data
                 include the massive data volumes, the high velocity
                 with which the data are captured, the need for
                 interactive response times, and the inherent inaccuracy
                 of the data. We propose an infrastructure, Elite, that
                 leverages peer-to-peer and parallel computing
                 techniques to address these challenges. The
                 infrastructure offers efficient, parallel update and
                 query processing by organizing the data into a layered
                 index structure that is logically centralized, but
                 physically distributed among computing nodes. The
                 infrastructure is elastic with respect to storage,
                 meaning that it adapts to fluctuations in the storage
                 volume, and with respect to computation, meaning that
                 the degree of parallelism can be adapted to best match
                 the computational requirements. Further, the
                 infrastructure offers advanced functionality, including
                 probabilistic simulations, for contending with the
                 inaccuracy of the underlying data in query processing.
                 Extensive empirical studies offer insight into
                 properties of the infrastructure and indicate that it
                 meets its design goals, thus enabling the effective
                 management of big spatiotemporal data.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kanza:2016:ESF,
  author =       "Yaron Kanza and Hadas Yaari",
  title =        "External sorting on flash storage: reducing cell
                 wearing and increasing efficiency by avoiding
                 intermediate writes",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "495--518",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0426-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:41:51 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper studies the problem of how to conduct
                 external sorting on flash drives while avoiding
                 intermediate writes to the disk. The focus is on sort
                 in portable electronic devices, where relations are
                 only larger than the main memory by a small factor, and
                 on sort as part of distributed processes where
                 relations are frequently partially sorted. In such
                 cases, sort algorithms that refrain from writing
                 intermediate results to the disk have three advantages
                 over algorithms that perform intermediate writes.
                 First, on devices in which read operations are much
                 faster than writes, such methods are efficient and
                 frequently outperform Merge Sort. Secondly, they reduce
                 flash cell degradation caused by writes. Thirdly, they
                 can be used in cases where there is not enough disk
                 space for the intermediate results. Novel sort
                 algorithms that avoid intermediate writes to the disk
                 are presented. An experimental evaluation, on different
                 flash storage devices, shows that in many cases the new
                 algorithms can extend the lifespan of the devices by
                 avoiding unnecessary writes to the disk, while
                 maintaining efficiency, in comparison with Merge
                 Sort.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jeon:2016:MBS,
  author =       "Inah Jeon and Evangelos E. Papalexakis and Christos
                 Faloutsos and Lee Sael and U. Kang",
  title =        "Mining billion-scale tensors: algorithms and
                 discoveries",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "519--544",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0427-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:45:26 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "How can we analyze large-scale real-world data with
                 various attributes? Many real-world data (e.g., network
                 traffic logs, web data, social networks, knowledge
                 bases, and sensor streams) with multiple attributes are
                 represented as multi-dimensional arrays, called
                 tensors. For analyzing a tensor, tensor decompositions
                 are widely used in many data mining applications:
                 detecting malicious attackers in network traffic logs
                 (with source IP, destination IP, port-number,
                 timestamp), finding telemarketers in a phone call
                 history (with sender, receiver, date), and identifying
                 interesting concepts in a knowledge base (with subject,
                 object, relation). However, current tensor
                 decomposition methods do not scale to large and sparse
                 real-world tensors with millions of rows and columns
                 and `fibers.' In this paper, we propose HaTen2, a
                 distributed method for large-scale tensor
                 decompositions that runs on the MapReduce framework.
                 Our careful design and implementation of HaTen2
                 dramatically reduce the size of intermediate data and
                 the number of jobs leading to achieve high scalability
                 compared with the state-of-the-art method. Thanks to
                 HaTen2, we analyze big real-world sparse tensors that
                 cannot be handled by the current state of the art, and
                 discover hidden concepts.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Islam:2016:KYC,
  author =       "Md. Saiful Islam and Chengfei Liu",
  title =        "Know your customer: computing $k$-most promising
                 products for targeted marketing",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "545--570",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0428-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:45:26 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The advancement of World Wide Web has revolutionized
                 the way the manufacturers can do business. The
                 manufacturers can collect customer preferences for
                 products and product features from their sales and
                 other product-related Web sites to enter and sustain in
                 the global market. For example, the manufactures can
                 make intelligent use of these customer preference data
                 to decide on which products should be selected for
                 targeted marketing. However, the selected products must
                 attract as many customers as possible to increase the
                 possibility of selling more than their respective
                 competitors. This paper addresses this kind of product
                 selection problem. That is, given a database of
                 existing products P from the competitors, a set of
                 company's own products Q, a dataset C of customer
                 preferences and a positive integer k, we want to find
                 k-most promising products (k-MPP) from Q with maximum
                 expected number of total customers for targeted
                 marketing. We model k-MPP query and propose an
                 algorithmic framework for processing such query and its
                 variants. Our framework utilizes grid-based data
                 partitioning scheme and parallel computing techniques
                 to realize k-MPP query. The effectiveness and
                 efficiency of the framework are demonstrated by
                 conducting extensive experiments with real and
                 synthetic datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kohler:2016:PCK,
  author =       "Henning K{\"o}hler and Uwe Leck and Sebastian Link and
                 Xiaofang Zhou",
  title =        "Possible and certain keys for {SQL}",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "571--596",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0430-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:45:26 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Driven by the dominance of the relational model and
                 the requirements of modern applications, we revisit the
                 fundamental notion of a key in relational databases
                 with NULL. In SQL, primary key columns are NOT NULL,
                 and UNIQUE constraints guarantee uniqueness only for
                 tuples without NULL. We investigate the notions of
                 possible and certain keys, which are keys that hold in
                 some or all possible worlds that originate from an SQL
                 table, respectively. Possible keys coincide with
                 UNIQUE, thus providing a semantics for their syntactic
                 definition in the SQL standard. Certain keys extend
                 primary keys to include NULL columns and can uniquely
                 identify entities whenever feasible, while primary keys
                 may not. In addition to basic characterization,
                 axiomatization, discovery, and extremal combinatorics
                 problems, we investigate the existence and construction
                 of Armstrong tables, and describe an indexing scheme
                 for enforcing certain keys. Our experiments show that
                 certain keys with NULLs occur in real-world data, and
                 related computational problems can be solved
                 efficiently. Certain keys are therefore semantically
                 well founded and able to meet Codd's entity integrity
                 rule while handling high volumes of incomplete data
                 from different formats.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mottin:2016:HPA,
  author =       "Davide Mottin and Alice Marascu and Senjuti Basu Roy
                 and Gautam Das and Themis Palpanas and Yannis
                 Velegrakis",
  title =        "A holistic and principled approach for the
                 empty-answer problem",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "4",
  pages =        "597--622",
  month =        aug,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0431-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 21 06:45:26 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We propose a principled optimization-based interactive
                 query relaxation framework for queries that return no
                 answers. Given an initial query that returns an
                 empty-answer set, our framework dynamically computes
                 and suggests alternative queries with fewer conditions
                 than those the user has initially requested, in order
                 to help the user arrive at a query with a
                 non-empty-answer, or at a query for which no matter how
                 many additional conditions are ignored, the answer will
                 still be empty. Our proposed approach for suggesting
                 query relaxations is driven by a novel probabilistic
                 framework based on optimizing a wide variety of
                 application-dependent objective functions. We describe
                 optimal and approximate solutions of different
                 optimization problems using the framework. Moreover, we
                 discuss two important extensions to the base framework:
                 the specification of a minimum size on the number of
                 results returned by a relaxed query and the possibility
                 of proposing multiple conditions at the same time. We
                 analyze the proposed solutions, experimentally verify
                 their efficiency and effectiveness, and illustrate
                 their advantages over the existing approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Boncz:2016:SIM,
  author =       "Peter Boncz and Wolfgang Lehner and Thomas Neumann",
  title =        "Special Issue: Modern Hardware",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "623--624",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0440-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Porobic:2016:CIH,
  author =       "Danica Porobic and Ippokratis Pandis and Miguel Branco
                 and Pinar T{\"o}z{\"u}n and Anastasia Ailamaki",
  title =        "Characterization of the Impact of Hardware Islands on
                 {OLTP}",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "625--650",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0413-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern hardware is abundantly parallel and
                 increasingly heterogeneous. The numerous processing
                 cores have non-uniform access latencies to the main
                 memory and processor caches, which causes variability
                 in the communication costs. Unfortunately, database
                 systems mostly assume that all processing cores are the
                 same and that microarchitecture differences are not
                 significant enough to appear in critical database
                 execution paths. As we demonstrate in this paper,
                 however, non-uniform core topology does appear in the
                 critical path and conventional database architectures
                 achieve suboptimal and even worse, unpredictable
                 performance. We perform a detailed performance analysis
                 of OLTP deployments in servers with multiple cores per
                 CPU (multicore) and multiple CPUs per server
                 (multisocket). We compare different database deployment
                 strategies where we vary the number and size of
                 independent database instances running on a single
                 server, from a single shared-everything instance to
                 fine-grained shared-nothing configurations. We quantify
                 the impact of non-uniform hardware on various
                 deployments by (a) examining how efficiently each
                 deployment uses the available hardware resources and
                 (b) measuring the impact of distributed transactions
                 and skewed requests on different workloads. We show
                 that no strategy is optimal for all cases and that the
                 best choice depends on the combination of hardware
                 topology and workload characteristics. Finally, we
                 argue that transaction processing systems must be aware
                 of the hardware topology in order to achieve
                 predictably high performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sadoghi:2016:ESO,
  author =       "Mohammad Sadoghi and Kenneth A. Ross and Mustafa Canim
                 and Bishwaranjan Bhattacharjee",
  title =        "Exploiting {SSDs} in operational multiversion
                 databases",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "651--672",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0410-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Multiversion databases store both current and
                 historical data. Rows are typically annotated with
                 timestamps representing the period when the row is/was
                 valid. We develop novel techniques to reduce index
                 maintenance in multiversion databases, so that indexes
                 can be used effectively for analytical queries over
                 current data without being a heavy burden on
                 transaction throughput. To achieve this end, we
                 re-design persistent index data structures in the
                 storage hierarchy to employ an extra level of
                 indirection. The indirection level is stored on
                 solid-state disks that can support very fast random
                 I/Os, so that traversing the extra level of indirection
                 incurs a relatively small overhead. The extra level of
                 indirection dramatically reduces the number of magnetic
                 disk I/Os that are needed for index updates and
                 localizes maintenance to indexes on updated attributes.
                 Additionally, we batch insertions within the
                 indirection layer in order to reduce physical disk I/Os
                 for indexing new records. In this work, we further
                 exploit SSDs by introducing novel DeltaBlock techniques
                 for storing the recent changes to data on SSDs. Using
                 our DeltaBlock, we propose an efficient method to
                 periodically flush the recently changed data from SSDs
                 to HDDs such that, on the one hand, we keep track of
                 every change (or delta) for every record, and, on the
                 other hand, we avoid redundantly storing the unchanged
                 portion of updated records. By reducing the index
                 maintenance overhead on transactions, we enable
                 operational data stores to create more indexes to
                 support queries. We have developed a prototype of our
                 indirection proposal by extending the widely used
                 generalized search tree open-source project, which is
                 also employed in PostgreSQL. Our working implementation
                 demonstrates that we can significantly reduce index
                 maintenance and/or query processing cost by a factor of
                 3. For the insertion of new records, our novel batching
                 technique can save up to 90 \% of the insertion time.
                 For updates, our prototype demonstrates that we can
                 significantly reduce the database size by up to 80 \%
                 even with a modest space allocated for DeltaBlocks on
                 SSDs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kang:2016:FCE,
  author =       "Woon-Hak Kang and Sang-Won Lee and Bongki Moon",
  title =        "Flash as cache extension for online transactional
                 workloads",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "673--694",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0414-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Considering the current price gap between hard disk
                 and flash memory SSD storages, for applications dealing
                 with large-scale data, it will be economically more
                 sensible to use flash memory drives to supplement disk
                 drives rather than to replace them. This paper presents
                 FaCE, which is a new low-overhead caching strategy that
                 uses flash memory as an extension to the RAM buffer of
                 database systems. FaCE aims at improving the
                 transaction throughput as well as shortening the
                 recovery time from a system failure. To achieve the
                 goals, we propose two novel algorithms for flash cache
                 management, namely multi-version FIFO replacement and
                 group second chance. This was possible due to flash
                 write optimization as well as disk access reduction
                 obtained by the FaCE caching methods. In addition, FaCE
                 takes advantage of the nonvolatility of flash memory to
                 fully support database recovery by extending the scope
                 of a persistent database to include the data pages
                 stored in the flash cache. We have implemented FaCE in
                 the PostgreSQL open-source database server and
                 demonstrated its effectiveness for TPC-C benchmarks in
                 comparison with existing caching methods such as Lazy
                 Cleaning and Linux Bcache.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jin:2016:RWO,
  author =       "Peiquan Jin and Chengcheng Yang and Christian S.
                 Jensen and Puyuan Yang and Lihua Yue",
  title =        "Read\slash write-optimized tree indexing for
                 solid-state drives",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "695--717",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0406-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Flash-memory-based solid-state drives (SSDs) are used
                 widely for secondary storage. To be effective for SSDs,
                 traditional indices have to be redesigned to cope with
                 the special properties of flash memory, such as
                 asymmetric read/write latencies (fast reads and slow
                 writes) and out-of-place updates. Previous
                 flash-optimized indices focus mainly on reducing random
                 writes to SSDs, which is typically accomplished at the
                 expense of a substantial number of extra reads.
                 However, modern SSDs show a narrowing gap between read
                 and write speeds, and read operations on SSDs
                 increasingly affect the overall performance of indices
                 on SSDs. As a consequence, how to optimize SSD-aware
                 indices by reducing both write and read costs is a
                 pertinent and open challenge. We propose a new tree
                 index for SSDs that is able to reduce both writes and
                 extra reads. In particular, we use an update buffer and
                 overflow pages to reduce random writes, and we further
                 exploit Bloom filters to reduce the extra reads to the
                 overflow nodes in the tree. With this mechanism, we
                 construct a read/write-optimized index that is capable
                 of offering better overall performance than previous
                 flash-aware indices. In addition, we present an
                 analysis of the proposed index and show that the read
                 and write costs of the operations on the index can be
                 balanced by only tuning the false-positive rate of the
                 Bloom filters. Our experimental results suggest that
                 our proposal is efficient and represents an improvement
                 over existing methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sitaridi:2016:GAS,
  author =       "Evangelia A. Sitaridi and Kenneth A. Ross",
  title =        "{GPU}-accelerated string matching for database
                 applications",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "5",
  pages =        "719--740",
  month =        oct,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-015-0409-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Sep 12 18:50:32 MDT 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Implementations of relational operators on GPU
                 processors have resulted in order of magnitude speedups
                 compared to their multicore CPU counterparts. Here we
                 focus on the efficient implementation of string
                 matching operators common in SQL queries. Due to
                 different architectural features the optimal algorithm
                 for CPUs might be suboptimal for GPUs. GPUs achieve
                 high memory bandwidth by running thousands of threads,
                 so it is not feasible to keep the working set of all
                 threads in the cache in a naive implementation. In GPUs
                 the unit of execution is a group of threads and in the
                 presence of loops and branches, threads in a group have
                 to follow the same execution path; if some threads
                 diverge, then different paths are serialized. We study
                 the cache memory efficiency of single- and
                 multi-pattern string matching algorithms for
                 conventional and pivoted string layouts in the GPU
                 memory. We evaluate the memory efficiency in terms of
                 memory access pattern and achieved memory bandwidth for
                 different parallelization methods. To reduce thread
                 divergence, we split string matching into multiple
                 steps. We evaluate the different matching algorithms in
                 terms of average- and worst-case performance and
                 compare them against state-of-the-art CPU and GPU
                 libraries. Our experimental evaluation shows that
                 thread and memory efficiency affect performance
                 significantly and that our proposed methods outperform
                 previous CPU and GPU algorithms in terms of raw
                 performance and power efficiency. The
                 Knuth---Morris---Pratt algorithm is a good choice for
                 GPUs because its regular memory access pattern makes it
                 amenable to several GPU optimizations.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mottin:2016:EQN,
  author =       "Davide Mottin and Matteo Lissandrini and Yannis
                 Velegrakis and Themis Palpanas",
  title =        "Exemplar queries: a new way of searching",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "741--765",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0429-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern search engines employ advanced techniques that
                 go beyond the structures that strictly satisfy the
                 query conditions in an effort to better capture the
                 user intentions. In this work, we introduce a novel
                 query paradigm that considers a user query as an
                 example of the data in which the user is interested. We
                 call these queries exemplar queries. We provide a
                 formal specification of their semantics and show that
                 they are fundamentally different from notions like
                 queries by example, approximate queries and related
                 queries. We provide an implementation of these
                 semantics for knowledge graphs and present an exact
                 solution with a number of optimizations that improve
                 performance without compromising the result quality. We
                 study two different congruence relations, isomorphism
                 and strong simulation, for identifying the answers to
                 an exemplar query. We also provide an approximate
                 solution that prunes the search space and achieves
                 considerably better time performance with minimal or no
                 impact on effectiveness. The effectiveness and
                 efficiency of these solutions with synthetic and real
                 datasets are experimentally evaluated, and the
                 importance of exemplar queries in practice is
                 illustrated.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2016:EDL,
  author =       "Yuhong Li and Leong Hou U. and Man Lung Yiu and Zhiguo
                 Gong",
  title =        "Efficient discovery of longest-lasting correlation in
                 sequence databases",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "767--790",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0432-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The search for similar subsequences is a core module
                 for various analytical tasks in sequence databases.
                 Typically, the similarity computations require users to
                 set a length. However, there is no robust means by
                 which to define the proper length for different
                 application needs. In this study, we examine a new
                 query that is capable of returning the longest-lasting
                 highly correlated subsequences in a sequence database,
                 which is particularly helpful to analyses without prior
                 knowledge regarding the query length. A baseline, yet
                 expensive, solution is to calculate the correlations
                 for every possible subsequence length. To boost
                 performance, we study a space-constrained index that
                 provides a tight correlation bound for subsequences of
                 similar lengths and offset by intraobject and
                 interobject grouping techniques. To the best of our
                 knowledge, this is the first index to support a
                 normalized distance metric of arbitrary length
                 subsequences. In addition, we study the use of a smart
                 cache for disk-resident data (e.g., millions of
                 sequence objects) and a graph processing unit-based
                 parallel processing technique for frequently updated
                 data (e.g., nonindexable streaming sequences) to
                 compute the longest-lasting highly correlated
                 subsequences. Extensive experimental evaluation on both
                 real and synthetic sequence datasets verifies the
                 efficiency and effectiveness of our proposed methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fakas:2016:DPS,
  author =       "Georgios J. Fakas and Zhi Cai and Nikos Mamoulis",
  title =        "Diverse and proportional size-$l$ object summaries
                 using pairwise relevance",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "791--816",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0433-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The abundance and ubiquity of graphs (e.g., online
                 social networks such as Google++ and Facebook;
                 bibliographic graphs such as DBLP) necessitates the
                 effective and efficient search over them. Given a set
                 of keywords that can identify a data subject (DS), a
                 recently proposed keyword search paradigm produces a
                 set of object summaries (OSs) as results. An OS is a
                 tree structure rooted at the DS node (i.e., a node
                 containing the keywords) with surrounding nodes that
                 summarize all data held on the graph about the DS. OS
                 snippets, denoted as size-l OSs, have also been
                 investigated. A size-l OS is a partial OS containing l
                 nodes such that the summation of their importance
                 scores results in the maximum possible total score.
                 However, the set of nodes that maximize the total
                 importance score may result in an uninformative size-l
                 OSs, as very important nodes may be repeated in it,
                 dominating other representative information. In view of
                 this limitation, in this paper, we investigate the
                 effective and efficient generation of two novel types
                 of OS snippets, i.e., diverse and proportional size-l
                 OSs, denoted as DSize-l and PSize-l OSs. Namely,
                 besides the importance of each node, we also consider
                 its pairwise relevance (similarity) to the other nodes
                 in the OS and the snippet. We conduct an extensive
                 evaluation on two real graphs (DBLP and Google++). We
                 verify effectiveness by collecting user feedback, e.g.,
                 by asking DBLP authors (i.e., the DSs themselves) to
                 evaluate our results. In addition, we verify the
                 efficiency of our algorithms and evaluate the quality
                 of the snippets that they produce.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{BOgh:2016:SPW,
  author =       "Kenneth S. B{\O}gh and Sean Chester and Ira Assent",
  title =        "{SkyAlign}: a portable, work-efficient skyline
                 algorithm for multicore and {GPU} architectures",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "817--841",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0438-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The skyline operator determines points in a
                 multidimensional dataset that offer some optimal
                 trade-off. State-of-the-art CPU skyline algorithms
                 exploit quad-tree partitioning with complex branching
                 to minimise the number of point-to-point comparisons.
                 Branch-phobic GPU skyline algorithms rely on compute
                 throughput rather than partitioning, but fail to match
                 the performance of sequential algorithms. In this
                 paper, we introduce a new skyline algorithm, SkyAlign,
                 that is designed for the GPU, and a GPU-friendly,
                 grid-based tree structure upon which the algorithm
                 relies. The search tree allows us to dramatically
                 reduce the amount of work done by the GPU algorithm by
                 avoiding most point-to-point comparisons at the cost of
                 some compute throughput. This trade-off allows SkyAlign
                 to achieve orders of magnitude faster performance than
                 its predecessors. Moreover, a NUMA-oblivious port of
                 SkyAlign outperforms native multicore state of the art
                 on challenging workloads by an increasing margin as
                 more cores and sockets are utilised.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zoumpatianos:2016:AAD,
  author =       "Kostas Zoumpatianos and Stratos Idreos and Themis
                 Palpanas",
  title =        "{ADS}: the adaptive data series index",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "843--866",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0442-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Numerous applications continuously produce big amounts
                 of data series, and in several time critical scenarios
                 analysts need to be able to query these data as soon as
                 they become available. This, however, is not currently
                 possible with the state-of-the-art indexing methods and
                 for very large data series collections. In this paper,
                 we present the first adaptive indexing mechanism,
                 specifically tailored to solve the problem of indexing
                 and querying very large data series collections. We
                 present a detailed design and evaluation of our method
                 using approximate and exact query algorithms with both
                 synthetic and real data sets. Adaptive indexing
                 significantly outperforms previous solutions,
                 gracefully handling large data series collections,
                 reducing the data to query delay: By the time
                 state-of-the-art indexing techniques finish indexing 1
                 billion data series (and before answering even a single
                 query), our method has already answered 3*10^53`?105
                 queries.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2016:AWW,
  author =       "Qing Liu and Yunjun Gao and Gang Chen and Baihua Zheng
                 and Linlin Zhou",
  title =        "Answering why-not and why questions on reverse top-$k$
                 queries",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "867--892",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0443-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Why-not and why questions can be posed by database
                 users to seek clarifications on unexpected query
                 results. Specifically, why-not questions aim to explain
                 why certain expected tuples are absent from the query
                 results, while why questions try to clarify why certain
                 unexpected tuples are present in the query results.
                 This paper systematically explores the why-not and why
                 questions on reverse top-$k$ queries, owing to its
                 importance in multi-criteria decision making. We first
                 formalize why-not questions on reverse top-$k$ queries,
                 which try to include the missing objects in the reverse
                 top-$k$ query results, and then, we propose a unified
                 framework called WQRTQ to answer why-not questions on
                 reverse top-$k$ queries. Our framework offers three
                 solutions to cater for different application scenarios.
                 Furthermore, we study why questions on reverse top-$k$
                 queries, which aim to exclude the undesirable objects
                 from the reverse top-$k$ query results, and extend the
                 framework WQRTQ to efficiently answer why questions on
                 reverse top-$k$ queries, which demonstrates the
                 flexibility of our proposed algorithms. Extensive
                 experimental evaluation with both real and synthetic
                 data sets verifies the effectiveness and efficiency of
                 the presented algorithms under various experimental
                 settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2016:SSL,
  author =       "Yang Chen and Daisy Zhe Wang and Sean Goldberg",
  title =        "{ScaLeKB}: scalable learning and inference over large
                 knowledge bases",
  journal =      j-VLDB-J,
  volume =       "25",
  number =       "6",
  pages =        "893--918",
  month =        dec,
  year =         "2016",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0444-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Nov 10 18:03:04 MST 2016",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Recent years have seen a drastic rise in the
                 construction of web knowledge bases (e.g., Freebase,
                 YAGO, DBPedia). These knowledge bases store structured
                 information about real-world people, places,
                 organizations, etc. However, due to the limitations of
                 human knowledge, web corpora, and information
                 extraction algorithms, the knowledge bases are still
                 far from complete. To infer the missing knowledge, we
                 propose the Ontological Pathfinding (OP) algorithm to
                 mine first-order inference rules from these web
                 knowledge bases. The OP algorithm scales up via a
                 series of optimization techniques, including a new
                 parallel-rule-mining algorithm, a pruning strategy to
                 eliminate unsound and inefficient rules before applying
                 them, and a novel partitioning algorithm to break the
                 learning task into smaller independent sub-tasks.
                 Combining these techniques, we develop a first rule
                 mining system that scales to Freebase, the largest
                 public knowledge base with 112 million entities and 388
                 million facts. We mine 36,625 inference rules in 34 h;
                 no existing system achieves this scale. Based on the
                 mining algorithm and the optimizations, we develop an
                 efficient inference engine. As a result, we infer 0.9
                 billion new facts from Freebase in 17.19 h. We use
                 cross validation to evaluate the inferred facts and
                 estimate a degree of expansion by 0.6 over Freebase,
                 with a precision approaching 1.0. Our approach
                 outperforms state-of-the-art mining algorithms and
                 inference engines in terms of both performance and
                 quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2017:SIB,
  author =       "Chen Li and Volker Markl",
  title =        "Special issue on best papers of {VLDB 2015}",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "1--2",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0450-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See erratum \cite{Li:2017:ESI}.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2017:ESI,
  author =       "Chen Li and Volker Markl",
  title =        "Erratum to: {Special issue on best papers of VLDB
                 2015}",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "3--3",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0458-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Li:2017:SIB}.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gatterbauer:2017:DPA,
  author =       "Wolfgang Gatterbauer and Dan Suciu",
  title =        "Dissociation and propagation for approximate lifted
                 inference with standard relational database management
                 systems",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "5--30",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0434-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Probabilistic inference over large data sets is a
                 challenging data management problem since exact
                 inference is generally \#P-hard and is most often
                 solved approximately with sampling-based methods today.
                 This paper proposes an alternative approach for
                 approximate evaluation of conjunctive queries with
                 standard relational databases: In our approach, every
                 query is evaluated entirely in the database engine by
                 evaluating a fixed number of query plans, each
                 providing an upper bound on the true probability, then
                 taking their minimum. We provide an algorithm that
                 takes into account important schema information to
                 enumerate only the minimal necessary plans among all
                 possible plans. Importantly, this algorithm is a strict
                 generalization of all known PTIME self-join-free
                 conjunctive queries: A query is in PTIME if and only if
                 our algorithm returns one single plan. Furthermore, our
                 approach is a generalization of a family of efficient
                 ranking methods from graphs to hypergraphs. We also
                 adapt three relational query optimization techniques to
                 evaluate all necessary plans very fast. We give a
                 detailed experimental evaluation of our approach and,
                 in the process, provide a new way of thinking about the
                 value of probabilistic methods over non-probabilistic
                 methods for ranking query answers. We also note that
                 the techniques developed in this paper apply
                 immediately to lifted inference from statistical
                 relational models since lifted inference corresponds to
                 PTIME plans in probabilistic databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2017:RBR,
  author =       "Jiexing Li and Jeffrey F. Naughton and Rimma V.
                 Nehme",
  title =        "Resource bricolage and resource selection for parallel
                 database systems",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "31--54",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0435-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Running parallel database systems in an environment
                 with heterogeneous resources has become increasingly
                 common, due to cluster evolution and increasing
                 interest in moving applications into public clouds.
                 Performance differences among machines in the same
                 cluster pose new challenges for parallel database
                 systems. First, for database systems running in a
                 heterogeneous cluster, the default uniform data
                 partitioning strategy may overload some of the slow
                 machines, while at the same time it may underutilize
                 the more powerful machines. Since the processing time
                 of a parallel query is determined by the slowest
                 machine, such an allocation strategy may result in a
                 significant query performance degradation. Second,
                 since machines might have varying resources or
                 performance, different choices of machines may lead to
                 different costs or performance for executing the same
                 workload. By carefully selecting the most suitable
                 machines for running a workload, we may achieve better
                 performance with the same budget, or we may meet the
                 same performance requirements with a lower cost. We
                 address these challenges by introducing techniques we
                 call resource bricolage and resource selection that
                 improve database performance in heterogeneous
                 environments. Our approaches quantify the performance
                 differences among machines with various resources as
                 they process workloads with diverse resource
                 requirements. For the purpose of better resource
                 utilization, we formalize the problem of minimizing
                 workload execution time and view it as an optimization
                 problem, and then, we employ linear programming to
                 obtain a recommended data partitioning scheme. For the
                 purpose of better resource selection, we formalize two
                 problems: One minimizes the total workload execution
                 time with a given budget, and the other minimizes the
                 total budget with a given performance target. We then
                 employ different mixed-integer programs to search for
                 the optimal resource selection decisions. We verify the
                 effectiveness of both resource bricolage and resource
                 selection techniques with an extensive experimental
                 study.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Finis:2017:OIS,
  author =       "Jan Finis and Robert Brunel and Alfons Kemper and
                 Thomas Neumann and Norman May and Franz Faerber",
  title =        "{Order Indexes}: supporting highly dynamic
                 hierarchical data in relational main-memory database
                 systems",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "55--80",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0436-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Maintaining and querying hierarchical data in a
                 relational database system is an important task in many
                 business applications. This task is especially
                 challenging when considering dynamic use cases with a
                 high rate of complex, possibly skewed structural
                 updates. Labeling schemes are widely considered the
                 indexing technique of choice for hierarchical data, and
                 many different schemes have been proposed. However,
                 they cannot handle dynamic use cases well due to
                 various problems, which we investigate in this paper.
                 We therefore propose Order Indexes--a dynamic
                 representation of the nested intervals encoding--which
                 offer competitive query performance, unprecedented
                 update efficiency, and robustness for highly dynamic
                 workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sa:2017:IKB,
  author =       "Christopher Sa and Alex Ratner and Christopher R{\'e}
                 and Jaeho Shin and Feiran Wang and Sen Wu and Ce
                 Zhang",
  title =        "Incremental knowledge base construction using
                 {DeepDive}",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "81--105",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0437-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Populating a database with information from
                 unstructured sources--also known as knowledge base
                 construction (KBC)--is a long-standing problem in
                 industry and research that encompasses problems of
                 extraction, cleaning, and integration. In this work, we
                 describe DeepDive, a system that combines database and
                 machine learning ideas to help develop KBC systems, and
                 we present techniques to make the KBC process more
                 efficient. We observe that the KBC process is
                 iterative, and we develop techniques to incrementally
                 produce inference results for KBC systems. We propose
                 two methods for incremental inference, based,
                 respectively, on sampling and variational techniques.
                 We also study the trade-off space of these methods and
                 develop a simple rule-based optimizer. DeepDive
                 includes all of these contributions, and we evaluate
                 DeepDive on five KBC systems, showing that it can speed
                 up KBC inference tasks by up to two orders of magnitude
                 with negligible impact on quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Trummer:2017:MOP,
  author =       "Immanuel Trummer and Christoph Koch",
  title =        "Multi-objective parametric query optimization",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "107--124",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0439-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Classical query optimization compares query plans
                 according to one cost metric and associates each plan
                 with a constant cost value. In this paper, we introduce
                 the multi-objective parametric query optimization
                 (MPQO) problem where query plans are compared according
                 to multiple cost metrics and the cost of a given plan
                 according to a given metric is modeled as a function
                 that depends on multiple parameters. The cost metrics
                 may, for instance, include execution time or monetary
                 fees; a parameter may represent the selectivity of a
                 query predicate that is unspecified at optimization
                 time. MPQO generalizes parametric query optimization
                 (which allows multiple parameters but only one cost
                 metric) and multi-objective query optimization (which
                 allows multiple cost metrics but no parameters). We
                 formally analyze the novel MPQO problem and show why
                 existing algorithms are inapplicable. We present a
                 generic algorithm for MPQO and a specialized version
                 for MPQO with piecewise-linear plan cost functions. We
                 prove that both algorithms find all relevant query
                 plans and experimentally evaluate the performance of
                 our second algorithm in multiple scenarios.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Khayyat:2017:FSI,
  author =       "Zuhair Khayyat and William Lucia and Meghna Singh and
                 Mourad Ouzzani and Paolo Papotti and Jorge-Arnulfo
                 Quian{\'e}-Ruiz and Nan Tang and Panos Kalnis",
  title =        "Fast and scalable inequality joins",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "1",
  pages =        "125--150",
  month =        feb,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0441-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sun Mar 12 10:52:26 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Inequality joins, which is to join relations with
                 inequality conditions, are used in various
                 applications. Optimizing joins has been the subject of
                 intensive research ranging from efficient join
                 algorithms such as sort-merge join, to the use of
                 efficient indices such as B^+B+-tree, R^*R`?-tree and
                 Bitmap. However, inequality joins have received little
                 attention and queries containing such joins are notably
                 very slow. In this paper, we introduce fast inequality
                 join algorithms based on sorted arrays and
                 space-efficient bit-arrays. We further introduce a
                 simple method to estimate the selectivity of inequality
                 joins which is then used to optimize multiple predicate
                 queries and multi-way joins. Moreover, we study an
                 incremental inequality join algorithm to handle
                 scenarios where data keeps changing. We have
                 implemented a centralized version of these algorithms
                 on top of PostgreSQL, a distributed version on top of
                 Spark SQL, and an existing data cleaning system,
                 Nadeef. By comparing our algorithms against well-known
                 optimization techniques for inequality joins, we show
                 our solution is more scalable and several orders of
                 magnitude faster.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2017:RKN,
  author =       "Shiyu Yang and Muhammad Aamir Cheema and Xuemin Lin
                 and Ying Zhang and Wenjie Zhang",
  title =        "Reverse $k$ nearest neighbors queries and spatial
                 reverse top-$k$ queries",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "151--176",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0445-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a set of facilities and a set of users, a
                 reverse k nearest neighbors (RkNN) query q returns
                 every user for which the query facility is one of the k
                 closest facilities. Almost all of the existing
                 techniques to answer RkNN queries adopt a
                 pruning-and-verification framework. Regions-based
                 pruning and half-space pruning are the two most notable
                 pruning strategies. The half-space-based approach
                 prunes a larger area and is generally believed to be
                 superior. Influenced by this perception, almost all
                 existing RkNN algorithms utilize and improve the
                 half-space pruning strategy. We observe the weaknesses
                 and strengths of both strategies and discover that the
                 regions-based pruning has certain strengths that have
                 not been exploited in the past. Motivated by this, we
                 present a new regions-based pruning algorithm called
                 Slice that utilizes the strength of regions-based
                 pruning and overcomes its limitations. We also study
                 spatial reverse top-$k$ (SRTk) queries that return
                 every user u for which the query facility is one of the
                 top-$k$ facilities according to a given linear scoring
                 function. We first extend half-space-based pruning to
                 answer SRTk queries. Then, we propose a novel
                 regions-based pruning algorithm following Slice
                 framework to solve the problem. Our extensive
                 experimental study on synthetic and real data sets
                 demonstrates that Slice is significantly more efficient
                 than all existing RkNN and SRTk algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2017:DBQ,
  author =       "Kun Li and Xiaofeng Zhou and Daisy Zhe Wang and
                 Christan Grant and Alin Dobra and Christopher Dudley",
  title =        "In-database batch and query-time inference over
                 probabilistic graphical models using {UDA} --- {GIST}",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "177--201",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0446-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "To meet customers' pressing demands, enterprise
                 database vendors have been pushing advanced analytical
                 techniques into databases. Most major DBMSes use
                 user-defined aggregates (UDAs), a data-driven operator,
                 to implement analytical techniques in parallel.
                 However, UDAs alone are not sufficient to implement
                 statistical algorithms where most of the work is
                 performed by iterative transitions over a large state
                 that cannot be naively partitioned due to data
                 dependency. Typically, this type of statistical
                 algorithm requires pre-processing to set up the large
                 state in the first place and demands post-processing
                 after the statistical inference. This paper presents
                 general iterative state transition (GIST), a new
                 database operator for parallel iterative state
                 transitions over large states. GIST receives a state
                 constructed by a UDA and then performs rounds of
                 transitions on the state until it converges. A final
                 UDA performs post-processing and result extraction. We
                 argue that the combination of UDA and GIST (UDA---GIST)
                 unifies data-parallel and state-parallel processing in
                 a single system, thus significantly extending the
                 analytical capabilities of DBMSes. We exemplify the
                 framework through two high-profile batch applications:
                 cross-document coreference, image denoising and one
                 query-time inference application: marginal inference
                 queries over probabilistic knowledge graphs. The 3
                 applications use probabilistic graphical models, which
                 encode complex relationships of different variables and
                 are powerful for a wide range of problems. We show that
                 the in-database framework allows us to tackle a 27
                 times larger problem than a scalable distributed
                 solution for the first application and achieves 43
                 times speedup over the state-of-the-art for the second
                 application. For the third application, we implement
                 query-time inference using the UDA---GIST framework and
                 apply over a probabilistic knowledge graph, achieving
                 10 times speedup over sequential inference. To the best
                 of our knowledge, this is the first in-database
                 query-time inference engine over large probabilistic
                 knowledge base. We show that the UDA---GIST framework
                 for data- and graph-parallel computations can support
                 both batch and query-time inference efficiently in
                 databases.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xie:2017:PTP,
  author =       "Miao Xie and Sourav S. Bhowmick and Gao Cong and Qing
                 Wang",
  title =        "{PANDA}: toward partial topology-based search on large
                 networks in a single machine",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "203--228",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0447-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A large body of research has focused on efficient and
                 scalable processing of subgraph search queries on large
                 networks. In these efforts, a query is posed in the
                 form of a connected query graph. Unfortunately, in
                 practice end users may not always have precise
                 knowledge about the topological relationships between
                 nodes in a query graph to formulate a connected query.
                 In this paper, we present a novel graph querying
                 paradigm called partial topology-based network search
                 and propose a query processing framework called panda
                 to efficiently process partial topology query (ptq) in
                 a single machine. A ptq is a disconnected query graph
                 containing multiple connected query components. ptqs
                 allow an end user to formulate queries without
                 demanding precise information about the complete
                 topology of a query graph. To this end, we propose an
                 exact and an approximate algorithm called sen-panda and
                 po-panda, respectively, to generate top-$k$ matches of
                 a ptq. We also present a subgraph simulation-based
                 optimization technique to further speedup the
                 processing of ptqs. Using real-life networks with
                 millions of nodes, we experimentally verify that our
                 proposed algorithms are superior to several baseline
                 techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2017:SPM,
  author =       "Mohan Yang and Alexander Shkapsky and Carlo Zaniolo",
  title =        "Scaling up the performance of more powerful {Datalog}
                 systems on multicore machines",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "229--248",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0448-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Extending RDBMS technology to achieve performance and
                 scalability for queries that are much more powerful
                 than those of SQL-2 has been the goal of deductive
                 database research for more than thirty years. The
                 \mathcal{D}e\mathcal{A}\mathcal{L}\mathcal{S}DeALS
                 system has made major progress toward this goal, by (1)
                 Datalog extensions that support the more powerful
                 recursive queries needed in advanced applications, and
                 (2) superior performance for both traditional recursive
                 queries and those made possible by the new extensions,
                 while (3) delivering competitive performance with
                 commercial RDBMSs on non-recursive queries. In this
                 paper, we focus on the techniques used to support the
                 in-memory evaluation of Datalog programs on multicore
                 machines. In \mathcal{D}e\mathcal{A}\mathcal
                 {L}\mathcal{S}DeALS, a Datalog program is represented
                 as an AND/OR tree, and multiple copies of the same
                 AND/OR tree are used to access the tables in the
                 database concurrently during the parallel evaluation.
                 We describe compilation techniques that (1) recognize
                 when the given program is lock-free, (2) transform a
                 locking program into a lock-free program, and (3) find
                 an efficient parallel plan that correctly evaluates the
                 program while minimizing the use of locks and other
                 overhead required for parallel evaluation. Extensive
                 experiments demonstrate the effectiveness of the
                 proposed techniques.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yu:2017:UFS,
  author =       "Minghe Yu and Jin Wang and Guoliang Li and Yong Zhang
                 and Dong Deng and Jianhua Feng",
  title =        "A unified framework for string similarity search with
                 edit-distance constraint",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "249--274",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0449-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "String similarity search is a fundamental operation in
                 data cleaning and integration. It has two variants:
                 threshold-based string similarity search and top-$ k k
                 $ string similarity search. Existing algorithms are
                 efficient for either the former or the latter; most of
                 them cannot support both two variants. To address this
                 limitation, we propose a unified framework. We first
                 recursively partition strings into disjoint segments
                 and build a hierarchical segment tree index ({\textsf
                 {HS}}{\text {-}}{\textsf {Tree}}HS-Tree) on top of the
                 segments. Then, we utilize the {\textsf {HS}}{\text
                 {-}}{\textsf {Tree}}HS-Tree to support similarity
                 search. For threshold-based search, we identify
                 appropriate tree nodes based on the threshold to answer
                 the query and devise an efficient algorithm
                 (HS-Search). For top-$ k k $ search, we identify
                 promising strings with large possibility to be similar
                 to the query, utilize these strings to estimate an
                 upper bound which is used to prune dissimilar strings
                 and propose an algorithm (HS-Topk). We develop
                 effective pruning techniques to further improve the
                 performance. To support large data sets, we extend our
                 techniques to support the disk-based setting.
                 Experimental results on real-world data sets show that
                 our method achieves high performance on the two
                 problems and outperforms state-of-the-art algorithms by
                 5---10 times.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yuan:2017:EEG,
  author =       "Long Yuan and Lu Qin and Xuemin Lin and Lijun Chang
                 and Wenjie Zhang",
  title =        "{I/O} efficient {ECC} graph decomposition via graph
                 reduction",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "2",
  pages =        "275--300",
  month =        apr,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0451-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Mar 27 20:55:44 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The problem of computing $k$-edge connected components
                 ($k$-\mathsf {ECC}ECCs) of a graph G for a specific $k$
                 is a fundamental graph problem and has been
                 investigated recently. In this paper, we study the
                 problem of \mathsf {ECC}ECC decomposition, which
                 computes the $k$-\mathsf {ECC}ECCs of a graph G for all
                 possible k values. \mathsf {ECC}ECC decomposition can
                 be widely applied in a variety of applications such as
                 graph-topology analysis, community detection, Steiner
                 Component Search, and graph visualization. A
                 straightforward solution for \mathsf {ECC}ECC
                 decomposition is to apply the existing $k$-\mathsf
                 {ECC}ECC computation algorithm to compute the
                 $k$-\mathsf {ECC}ECCs for all $k$ values. However, this
                 solution is not applicable to large graphs for two
                 challenging reasons. First, all existing $k$-\mathsf
                 {ECC}ECC computation algorithms are highly memory
                 intensive due to the complex data structures used in
                 the algorithms. Second, the number of possible $k$
                 values can be very large, resulting in a high
                 computational cost when each $k$ value is independently
                 considered. In this paper, we address the above
                 challenges, and study I/O efficient \mathsf {ECC}ECC
                 decomposition via graph reduction. We introduce two
                 elegant graph reduction operators which aim to reduce
                 the size of the graph loaded in memory while preserving
                 the connectivity information of a certain set of edges
                 to be computed for a specific k. We also propose three
                 novel I/O efficient algorithms, \mathsf{Bottom}-\mathsf
                 {Up}, \mathsf {Top}-\mathsf {Down}, and \mathsf
                 {Hybrid}, that explore the $k$ values in different
                 orders to reduce the redundant computations between
                 different $k$ values. We analyze the I/O and memory
                 costs for all proposed algorithms. In addition, we
                 extend our algorithm to build an efficient index for
                 Steiner Component Search. We show that our index can be
                 used to perform Steiner Component Search in optimal
                 I/Os when only the node information of the graph is
                 allowed to be loaded in memory. In our experiments, we
                 evaluate our algorithms using seven real large datasets
                 with various graph properties, one of which contains
                 1.95 billion edges. The experimental results show that
                 our proposed algorithms are scalable and efficient.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2017:TKS,
  author =       "Xiang Wang and Wenjie Zhang and Ying Zhang and Xuemin
                 Lin and Zengfeng Huang",
  title =        "Top-$k$ spatial-keyword publish\slash subscribe over
                 sliding window",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "301--326",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0453-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the prevalence of social media and GPS-enabled
                 devices, a massive amount of geo-textual data have been
                 generated in a stream fashion, leading to a variety of
                 applications such as location-based recommendation and
                 information dissemination. In this paper, we
                 investigate a novel real-time top-kk monitoring problem
                 over sliding window of streaming data; that is, we
                 continuously maintain the top-$k$ most relevant
                 geo-textual messages (e.g., geo-tagged tweets) for a
                 large number of spatial-keyword subscriptions (e.g.,
                 registered users interested in local events)
                 simultaneously. To provide the most recent information
                 under controllable memory cost, sliding window model is
                 employed on the streaming geo-textual data. To the best
                 of our knowledge, this is the first work to study
                 top-kk spatial-keyword publish/subscribe over sliding
                 window. A novel centralized system, called Skype
                 (Top-kSpatial-keyword Publish/Subscribe), is proposed
                 in this paper. In Skype, to continuously maintain
                 top-kk results for massive subscriptions, we devise a
                 novel indexing structure upon subscriptions such that
                 each incoming message can be immediately delivered on
                 its arrival. To reduce the expensive top-kk
                 re-evaluation cost triggered by message expiration, we
                 develop a novel cost-basedk-skyband technique to reduce
                 the number of re-evaluations in a cost-effective way.
                 Extensive experiments verify the great efficiency and
                 effectiveness of our proposed techniques. Furthermore,
                 to support better scalability and higher throughput, we
                 propose a distributed version of Skype, namely DSkype,
                 on top of Storm, which is a popular distributed stream
                 processing system. With the help of fine-tuned
                 subscription/message distribution mechanisms, DSkype
                 can achieve orders of magnitude speed-up than its
                 centralized version.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2017:PBH,
  author =       "Jun Gao and Yuqiong Liu and Chang Zhou and Jeffrey Xu
                 Yu",
  title =        "Path-based holistic detection plan for multiple
                 patterns in distributed graph frameworks",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "327--345",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-016-0452-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Multiple pattern detection is needed in applications
                 like disease analysis over gene networks, bug detection
                 in program flow networks. This paper takes pattern
                 detection to investigate the evaluation and
                 optimization of multiple jobs in existing distributed
                 graph processing frameworks. The evaluation plan for
                 multiple pattern detection should be parallelizable and
                 can capture and reuse the shared parts among pattern
                 queries easily. In this paper, we design a path-based
                 holistic plan for multiple pattern queries.
                 Specifically, (1) we design a path-based edge-covered
                 plan for an individual pattern. The paths in the plan
                 can be easily captured and reused among different
                 queries. Additionally, the evaluation plan is fully
                 parallelizable, in which each data vertex performs
                 necessary join operations independently during
                 exploring graph. (2) We extend the individual plan to a
                 holistic evaluation plan for multiple queries, whose
                 results are equivalent to those of individual queries.
                 The plan reduces the overall cost by finding frequent
                 paths among queries and reusing the shared part in the
                 holistic plan. (3) We devise various optimization
                 strategies over the holistic plan. The experimental
                 studies, conducted on Giraph, illustrate the high
                 effectiveness of our holistic approaches.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yi:2017:AVQ,
  author =       "Peipei Yi and Byron Choi and Sourav S. Bhowmick and
                 Jianliang Xu",
  title =        "{AutoG}: a visual query autocompletion framework for
                 graph databases",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "347--372",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0454-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Composing queries is evidently a tedious task. This is
                 particularly true of graph queries as they are
                 typically complex and prone to errors, compounded by
                 the fact that graph schemas can be missing or too loose
                 to be helpful for query formulation. Despite the great
                 success of query formulation aids, in particular,
                 automatic query completion, graph query autocompletion
                 has received much less research attention. In this
                 paper, we propose a novel framework for subgraph query
                 autocompletion (called AutoG). Given an initial query q
                 and a user's preference as input, AutoG returns ranked
                 query suggestions Q'Q'z as output. Users may choose a
                 query from Q'Q'z and iteratively apply AutoG to compose
                 their queries. The novelties of AutoG are as follows:
                 First, we formalize query composition. Second, we
                 propose to increment a query with the logical units
                 called c-prime features that are (i) frequent subgraphs
                 and (ii) constructed from smaller c-prime features in
                 no more than c ways. Third, we propose algorithms to
                 rank candidate suggestions. Fourth, we propose a novel
                 index called feature Dag (FDag) to optimize the
                 ranking. We study the query suggestion quality with
                 simulations and real users and conduct an extensive
                 performance evaluation. The results show that the query
                 suggestions are useful (saved roughly 40\% of users'
                 mouse clicks), and AutoG returns suggestions shortly
                 under a large variety of parameter settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aljubayrin:2017:FLC,
  author =       "Saad Aljubayrin and Jianzhong Qi and Christian S.
                 Jensen and Rui Zhang and Zhen He and Yuan Li",
  title =        "Finding lowest-cost paths in settings with safe and
                 preferred zones",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "373--397",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0455-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We define and study Euclidean and spatial network
                 variants of a new path finding problem: given a set of
                 safe or preferred zones with zero or low cost, find
                 paths that minimize the cost of travel from an origin
                 to a destination. In this problem, the entire space is
                 passable, with preference given to safe or preferred
                 zones. Existing algorithms for problems that involve
                 unsafe regions to be avoided strictly are not effective
                 for this new problem. To solve the Euclidean variant,
                 we devise a transformation of the continuous data space
                 with safe zones into a discrete graph upon which
                 shortest path algorithms apply. A naive transformation
                 yields a large graph that is expensive to search. In
                 contrast, our transformation exploits properties of
                 hyperbolas in Euclidean space to safely eliminate graph
                 edges, thus improving performance without affecting
                 correctness. To solve the spatial network variant, we
                 propose a different graph-to-graph transformation that
                 identifies critical points that serve the same purpose
                 as do the hyperbolas, thus also avoiding the extraneous
                 edges. Having solved the problem for safe zones with
                 zero costs, we extend the transformations to the
                 weighted version of the problem, where travel in
                 preferred zones has nonzero costs. Experiments on both
                 real and synthetic data show that our approaches
                 outperform baseline approaches by more than an order of
                 magnitude in graph construction time, storage space,
                 and query response time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2017:DSP,
  author =       "Dongxiang Zhang and Dingyu Yang and Yuan Wang and
                 Kian-Lee Tan and Jian Cao and Heng Tao Shen",
  title =        "Distributed shortest path query processing on dynamic
                 road networks",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "399--419",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0457-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Shortest path query processing on dynamic road
                 networks is a fundamental component for real-time
                 navigation systems. In the face of an enormous volume
                 of customer demand from Uber and similar apps, it is
                 desirable to study distributed shortest path query
                 processing that can be deployed on elastic and
                 fault-tolerant cloud platforms. In this paper, we
                 combine the merits of distributed streaming computing
                 systems and lightweight indexing to build an efficient
                 shortest path query processing engine on top of Yahoo
                 S4. We propose two types of asynchronous communication
                 algorithms for early termination. One is
                 first-in-first-out message propagation with certain
                 optimizations, and the other is prioritized message
                 propagation with the help of navigational intelligence.
                 Extensive experiments were conducted on large-scale
                 real road networks, and the results show that the query
                 efficiency of our methods can meet the real-time
                 requirement and is superior to Pregel and Pregel+. The
                 source code of our system is publicly available at
                 https://github.com/yangdingyu/cands.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lai:2017:SSE,
  author =       "Longbin Lai and Lu Qin and Xuemin Lin and Lijun
                 Chang",
  title =        "Scalable subgraph enumeration in {MapReduce}: a
                 cost-oriented approach",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "421--446",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0459-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Subgraph enumeration, which aims to find all the
                 subgraphs of a large data graph that are isomorphic to
                 a given pattern graph, is a fundamental graph problem
                 with a wide range of applications. However, existing
                 sequential algorithms for subgraph enumeration fall
                 short in handling large graphs due to the involvement
                 of computationally intensive subgraph isomorphism
                 operations. Thus, some recent researches focus on
                 solving the problem using MapReduce. Nevertheless,
                 exiting MapReduce approaches are not scalable to handle
                 very large graphs since they either produce a huge
                 number of partial results or consume a large amount of
                 memory. Motivated by this, in this paper, we propose a
                 new algorithm \mathsf {Twin}Twin\mathsf
                 {Twig}Twig\mathsf {Join}Join based on a left-deep-join
                 framework in MapReduce, in which the basic join unit is
                 a \mathsf {Twin}Twin\mathsf {Twig}Twig (an edge or two
                 incident edges of a node). We show that in the
                 Erd{\"o}s---R{\'e}nyi random graph model, \mathsf
                 {Twin}Twin\mathsf {Twig}Twig\mathsf {Join}Join is
                 instance optimal in the left-deep-join framework under
                 reasonable assumptions, and we devise an algorithm to
                 compute the optimal join plan. We further discuss how
                 our approach can be adapted to handle the power-law
                 random graph model. Three optimization strategies are
                 explored to improve our algorithm. Ultimately, by
                 aggregating equivalent nodes into a compressed node, we
                 construct the compressed graph, upon which the subgraph
                 enumeration is further improved. We conduct extensive
                 performance studies in several real graphs, one of
                 which contains billions of edges. Our approach
                 significantly outperforms existing solutions in all
                 tests.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cafagna:2017:DIP,
  author =       "Francesco Cafagna and Michael H. B{\"o}hlen",
  title =        "Disjoint interval partitioning",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "3",
  pages =        "447--466",
  month =        jun,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0456-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 24 11:54:27 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In databases with time interval attributes, query
                 processing techniques that are based on sort-merge or
                 sort-aggregate deteriorate. This happens because for
                 intervals no total order exists and either the start or
                 end point is used for the sorting. Doing so leads to
                 inefficient solutions with lots of unproductive
                 comparisons that do not produce an output tuple. Even
                 if just one tuple with a long interval is present in
                 the data, the number of unproductive comparisons of
                 sort-merge and sort-aggregate gets quadratic. In this
                 paper we propose disjoint interval partitioning
                 (\mathcal {DIP}DIP), a technique to efficiently perform
                 sort-based operators on interval data. \mathcal
                 {DIP}DIP divides an input relation into the minimum
                 number of partitions, such that all tuples in a
                 partition are non-overlapping. The absence of
                 overlapping tuples guarantees efficient sort-merge
                 computations without backtracking. With \mathcal
                 {DIP}DIP the number of unproductive comparisons is
                 linear in the number of partitions. In contrast to
                 current solutions with inefficient random accesses to
                 the active tuples, \mathcal {DIP}DIP fetches the tuples
                 in a partition sequentially. We illustrate the
                 generality and efficiency of \mathcal {DIP}DIP by
                 describing and evaluating three basic database
                 operators over interval data: join, anti-join and
                 aggregation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gao:2017:EFR,
  author =       "Yunjun Gao and Xiaoye Miao and Gang Chen and Baihua
                 Zheng and Deng Cai and Huiyong Cui",
  title =        "On efficiently finding reverse $k$-nearest neighbors
                 over uncertain graphs",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "467--492",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0460-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Reverse $k$-nearest neighbor (\hbox {R}k\hbox
                 {NN}RkNN) query on graphs returns the data objects that
                 take a specified query object q as one of their
                 $k$-nearest neighbors. It has significant influence in
                 many real-life applications including resource
                 allocation and profile-based marketing. However, to the
                 best of our knowledge, there is little previous work on
                 \hbox {R}k\hbox {NN}RkNN search over uncertain graph
                 data, even though many complex networks such as traffic
                 networks and protein---protein interaction networks are
                 often modeled as uncertain graphs. In this paper, we
                 systematically study the problem of reverse $k$-nearest
                 neighbor search on uncertain graphs (\hbox {UG-R}k\hbox
                 {NN}UG-RkNN search for short), where graph edges
                 contain uncertainty. First, to address \hbox
                 {UG-R}k\hbox {NN}UG-RkNN search, we propose three
                 effective heuristics, i.e., GSP, EGR, and PBP, which
                 minimize the original large uncertain graph as a much
                 smaller essential uncertain graph, cut down the number
                 of possible graphs via the newly introduced graph
                 conditional dominance relationship, and reduce the
                 validation cost of data nodes in order to improve query
                 efficiency. Then, we present an efficient algorithm,
                 termed as SDP, to support \hbox {UG-R}k\hbox
                 {NN}UG-RkNN retrieval by seamlessly integrating the
                 three heuristics together. In view of the high
                 complexity of \hbox {UG-R}k\hbox {NN}UG-RkNN search, we
                 further present a novel algorithm called TripS, with
                 the help of an adaptive stratified sampling technique.
                 Extensive experiments using both real and synthetic
                 graphs demonstrate the performance of our proposed
                 algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tao:2017:SSW,
  author =       "Yufei Tao and Xiaocheng Hu and Miao Qiao",
  title =        "Stream sampling over windows with worst-case
                 optimality and $ \ell $-overlap independence",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "493--510",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0461-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Sampling provides fundamental support to numerous
                 applications that cannot afford to materialize all the
                 objects arriving at a rapid speed. Existing stream
                 sampling algorithms guarantee small space and query
                 overhead, but all require worst-case update time
                 proportional to the number of samples. This creates a
                 performance issue when a large sample set is required.
                 In this paper, we propose a new sampling algorithm that
                 is optimal simultaneously in all the three aspects:
                 space, query time, and update time. In particular, the
                 algorithm handles an update in $ O(1) $ worst-case time
                 with a very small hidden constant. Our algorithm also
                 ensures a strong independence guarantee: the sample
                 sets of all the queries are mutually independent as
                 long as the overlap between two query windows is
                 small.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Nguyen:2017:ADC,
  author =       "Quoc Viet Nguyen and Chi Thang Duong and Thanh Tam
                 Nguyen and Matthias Weidlich and Karl Aberer and
                 Hongzhi Yin and Xiaofang Zhou",
  title =        "Argument discovery via crowdsourcing",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "511--535",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0462-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The amount of controversial issues being discussed on
                 the Web has been growing dramatically. In articles,
                 blogs, and wikis, people express their points of view
                 in the form of arguments, i.e., claims that are
                 supported by evidence. Discovery of arguments has a
                 large potential for informing decision-making. However,
                 argument discovery is hindered by the sheer amount of
                 available Web data and its unstructured, free-text
                 representation. The former calls for automatic
                 text-mining approaches, whereas the latter implies a
                 need for manual processing to extract the structure of
                 arguments. In this paper, we propose a
                 crowdsourcing-based approach to build a corpus of
                 arguments, an argumentation base, thereby mediating the
                 trade-off of automatic text-mining and manual
                 processing in argument discovery. We develop an
                 end-to-end process that minimizes the crowd cost while
                 maximizing the quality of crowd answers by: (1) ranking
                 argumentative texts, (2) pro-actively eliciting user
                 input to extract arguments from these texts, and (3)
                 aggregating heterogeneous crowd answers. Our
                 experiments with real-world datasets highlight that our
                 method discovers virtually all arguments in documents
                 when processing only 25\% of the text with more than
                 80\% precision, using only 50\% of the budget consumed
                 by a baseline algorithm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2017:EMA,
  author =       "Tianzheng Wang and Ryan Johnson and Alan Fekete and
                 Ippokratis Pandis",
  title =        "Efficiently making (almost) any concurrency control
                 mechanism serializable",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "537--562",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0463-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See erratum \cite{Wang:2018:EEM}.",
  abstract =     "Concurrency control (CC) algorithms must trade off
                 strictness for performance. In particular, serializable
                 CC schemes generally pay higher cost to prevent
                 anomalies, both in runtime overhead such as the
                 maintenance of lock tables and in efforts wasted by
                 aborting transactions. We propose the serial safety net
                 (SSN), a serializability-enforcing certifier which can
                 be applied on top of various CC schemes that offer
                 higher performance but admit anomalies, such as
                 snapshot isolation and read committed. The underlying
                 CC mechanism retains control of scheduling and
                 transactional accesses, while SSN tracks the resulting
                 dependencies. At commit time, SSN performs a validation
                 test by examining only direct dependencies of the
                 committing transaction to determine whether it can
                 commit safely or must abort to avoid a potential
                 dependency cycle. SSN performs robustly for a variety
                 of workloads. It maintains the characteristics of the
                 underlying CC without biasing toward a certain type of
                 transactions, though the underlying CC scheme might.
                 Besides traditional OLTP workloads, SSN also
                 efficiently handles heterogeneous workloads which
                 include a significant portion of long, read-mostly
                 transactions. SSN can avoid tracking the vast majority
                 of reads (thus reducing the overhead of serializability
                 certification) and still produce serializable
                 executions with little overhead. The dependency
                 tracking and validation tests can be done efficiently,
                 fully parallel and latch-free, for multi-version
                 systems on modern hardware with substantial core count
                 and large main memory. We demonstrate the efficiency,
                 accuracy and robustness of SSN using extensive
                 simulations and an implementation that overlays
                 snapshot isolation in ERMIA, a memory-optimized OLTP
                 engine that supports multiple CC schemes. Evaluation
                 results confirm that SSN is a promising approach to
                 serializability with robust performance and low
                 overhead for various workloads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2017:EAT,
  author =       "Qiankun Zhu and Hong Cheng and Xin Huang",
  title =        "{I/O}-efficient algorithms for top-$k$ nearest keyword
                 search in massive graphs",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "563--583",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0464-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Networks emerging nowadays usually have labels or
                 textual content on the nodes. We model such commonly
                 seen network as an undirected graph G, in which each
                 node is attached with zero or more keywords, and each
                 edge is assigned with a length. On such networks, a
                 novel and useful query is called top-k nearest keyword
                 (\mathsf {k\text {-}NK}k-NK) search. Given a query node
                 q in G and a keyword \lambda `?, a \mathsf {k\text
                 {-}NK}k-NK query searches k nodes which contain \lambda
                 `? and are nearest to q. The \mathsf {k\text {-}NK}k-NK
                 problem has been studied recently in the literature.
                 But most existing solutions assume that the graph as
                 well as the constructed index can fit entirely in
                 memory. As a result, they cannot be applied directly to
                 very large-scale networks which are commonly found in
                 practice, but cannot fit in memory. In this work, we
                 design an I/O-efficient solution, which uses a compact
                 disk index to answer a \mathsf {k\text {-}NK}k-NK query
                 with constant I/Os. The key to an accurate \mathsf
                 {k\text {-}NK}k-NK result is a precise shortest
                 distance estimation in a graph. In our solution, we
                 follow our previous work Qiao et al. (PVLDB
                 6:901---912, 2013) which uses the shortest path tree as
                 an approximate representation of a graph and uses the
                 tree distance between two nodes as an accurate
                 estimation of the shortest distance between them on a
                 graph. With such representation, the original \mathsf
                 {k\text {-}NK}k-NK query on a graph can be reduced to
                 answering the query on a set of trees and then
                 assembling the results obtained from the trees. We
                 exploit a compact tree-based index and study how to lay
                 out the index to disk. We design a novel technique
                 which decomposes the index tree into paths and subtrees
                 and stores them in disk. Our theoretical analysis shows
                 that the disk-based index is small in size and supports
                 constant query I/Os. Extensive experimental study on
                 massive trees and graphs with billions of edges and
                 keywords verifies our theoretical findings and
                 demonstrates the superiority of our method over the
                 state-of-the-art methods in the literature.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2017:IMU,
  author =       "Lu Chen and Yunjun Gao and Aoxiao Zhong and Christian
                 S. Jensen and Gang Chen and Baihua Zheng",
  title =        "Indexing metric uncertain data for range queries and
                 range joins",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "4",
  pages =        "585--610",
  month =        aug,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0465-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jul 27 16:38:23 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Range queries and range joins in metric spaces have
                 applications in many areas, including GIS,
                 computational biology, and data integration, where
                 metric uncertain data exist in different forms,
                 resulting from circumstances such as equipment
                 limitations, high-throughput sequencing technologies,
                 and privacy preservation. We represent metric uncertain
                 data by using an object-level model and a bi-level
                 model, respectively. Two novel indexes, the uncertain
                 pivot B^{+}+-tree (UPB-tree) and the uncertain pivot
                 B^{+}+-forest (UPB-forest), are proposed in order to
                 support probabilistic range queries and range joins for
                 a wide range of uncertain data types and similarity
                 metrics. Both index structures use a small set of
                 effective pivots chosen based on a newly defined
                 criterion and employ the B^{+}+-tree(s) as the
                 underlying index. In addition, we present efficient
                 metric probabilistic range query and metric
                 probabilistic range join algorithms, which utilize
                 validation and pruning techniques based on derived
                 probability lower and upper bounds. Extensive
                 experiments with both real and synthetic data sets
                 demonstrate that, compared against existing
                 state-of-the-art indexes for metric uncertain data, the
                 UPB-tree and the UPB-forest incur much lower
                 construction costs, consume less storage space, and can
                 support more efficient metric probabilistic range
                 queries and metric probabilistic range joins.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Song:2017:GRU,
  author =       "Shaoxu Song and Boge Liu and Hong Cheng and Jeffrey Xu
                 Yu and Lei Chen",
  title =        "Graph repairing under neighborhood constraints",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "611--635",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0466-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "A broad class of data, ranging from similarity
                 networks, workflow networks to protein networks, can be
                 modeled as graphs with data values as vertex labels.
                 Both vertex labels and neighbors could be dirty for
                 various reasons such as typos or erroneous reporting of
                 results in scientific experiments. Neighborhood
                 constraints, specifying label pairs that are allowed to
                 appear on adjacent vertices in the graph, are employed
                 to detect and repair erroneous vertex labels and
                 neighbors. In this paper, we study the problem of
                 repairing vertex labels and neighbors to make graphs
                 satisfy neighborhood constraints. Unfortunately, the
                 problem is generally hard, which motivates us to devise
                 approximation methods for repairing and identify
                 interesting special cases (star and clique constraints)
                 that can be efficiently solved. First, we propose
                 several label repairing approximation algorithms
                 including greedy heuristics, contraction method and an
                 approach combining both. The performances of algorithms
                 are also analyzed for the special case. Moreover, we
                 devise a cubic-time constant-factor graph repairing
                 algorithm with both label and neighbor repairs (given
                 degree-bounded instance graphs). Our extensive
                 experimental evaluation on real data demonstrates the
                 effectiveness of eliminating frauds in several types of
                 application networks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2017:EOV,
  author =       "Xiangmin Zhou and Lei Chen and Yanchun Zhang and Dong
                 Qin and Longbing Cao and Guangyan Huang and Chen Wang",
  title =        "Enhancing online video recommendation using social
                 user interactions",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "637--656",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0469-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The creation of media sharing communities has resulted
                 in the astonishing increase of digital videos, and
                 their wide applications in the domains like online news
                 broadcasting, entertainment and advertisement. The
                 improvement of these applications relies on effective
                 solutions for social user access to videos. This fact
                 has driven the research interest in the recommendation
                 in shared communities. Though effort has been put into
                 social video recommendation, the contextual information
                 on social users has not been well exploited for
                 effective recommendation. Motivated by this, in this
                 paper, we propose a novel approach based on the video
                 content and user information for the recommendation in
                 shared communities. A new solution is developed by
                 allowing batch video recommendation to multiple new
                 users and optimizing the subcommunity extraction. We
                 first propose an effective technique that reduces the
                 subgraph partition cost based on graph decomposition
                 and reconstruction for efficient subcommunity
                 extraction. Then, we design a summarization-based
                 algorithm which groups the clicked videos of multiple
                 unregistered users and simultaneously provide
                 recommendation to each of them. Finally, we present a
                 nontrivial social updates maintenance approach for
                 social data based on user connection summarization. We
                 evaluate the performance of our solution over a large
                 dataset considering different strategies for group
                 video recommendation in sharing communities.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Attasena:2017:SSC,
  author =       "Varunya Attasena and J{\'e}r{\^o}me Darmont and Nouria
                 Harbi",
  title =        "Secret sharing for cloud data security: a survey",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "657--681",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0470-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Cloud computing helps reduce costs, increase business
                 agility and deploy solutions with a high return on
                 investment for many types of applications. However,
                 data security is of premium importance to many users
                 and often restrains their adoption of cloud
                 technologies. Various approaches, i.e., data
                 encryption, anonymization, replication and
                 verification, help enforce different facets of data
                 security. Secret sharing is a particularly interesting
                 cryptographic technique. Its most advanced variants
                 indeed simultaneously enforce data privacy,
                 availability and integrity, while allowing computation
                 on encrypted data. The aim of this paper is thus to
                 wholly survey secret sharing schemes with respect to
                 data security, data access and costs in the
                 pay-as-you-go paradigm.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2017:QAL,
  author =       "Qiang Huang and Jianlin Feng and Qiong Fang and
                 Wilfred Ng and Wei Wang",
  title =        "Query-aware locality-sensitive hashing scheme for $
                 l_p $ norm",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "683--708",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0472-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The problem of c-Approximate Nearest Neighbor (c-ANN)
                 search in high-dimensional space is fundamentally
                 important in many applications, such as image database
                 and data mining. Locality-Sensitive Hashing (LSH) and
                 its variants are the well-known indexing schemes to
                 tackle the c-ANN search problem. Traditionally, LSH
                 functions are constructed in a query-oblivious manner,
                 in the sense that buckets are partitioned before any
                 query arrives. However, objects closer to a query may
                 be partitioned into different buckets, which is
                 undesirable. Due to the use of query-oblivious bucket
                 partition, the state-of-the-art LSH schemes for
                 external memory, namely C2LSH and LSB-Forest, only work
                 with approximation ratio of integer $ c \ge 2 c'z2 $.
                 In this paper, we introduce a novel concept of
                 query-aware bucket partition which uses a given query
                 as the ``anchor'' for bucket partition. Accordingly, a
                 query-aware LSH function under a specific $ l_p $ norm
                 with $ p \in (0, 2]p'z(0, 2] $ is a random projection
                 coupled with query-aware bucket partition, which
                 removes random shift required by traditional
                 query-oblivious LSH functions. The query-aware bucket
                 partitioning strategy can be easily implemented so that
                 query performance is guaranteed. For each $ l_p $ norm
                 $ (p \in (0, 2])(p'z(0, 2]) $, based on the
                 corresponding p-stable distribution, we propose a novel
                 LSH scheme named query-aware LSH (QALSH) for c-ANN
                 search over external memory. Our theoretical studies
                 show that QALSH enjoys a guarantee on query quality.
                 The use of query-aware LSH function enables QALSH to
                 work with any approximation ratio $ c > 1 $. In
                 addition, we propose a heuristic variant named QALSH^++
                 to improve the scalability of QALSH. Extensive
                 experiments show that QALSH and QALSH^++ outperform the
                 state-of-the-art schemes, especially in
                 high-dimensional space. Specifically, by using a ratio
                 $ c < 2 $, QALSH can achieve much better query
                 quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2017:GSG,
  author =       "Qijun Zhu and Haibo Hu and Cheng Xu and Jianliang Xu
                 and Wang-Chien Lee",
  title =        "Geo-social group queries with minimum acquaintance
                 constraints",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "709--727",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0473-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The prosperity of location-based social networking has
                 paved the way for new applications of group-based
                 activity planning and marketing. While such
                 applications heavily rely on geo-social group queries
                 (GSGQs), existing studies fail to produce a cohesive
                 group in terms of user acquaintance. In this paper, we
                 propose a new family of GSGQs with minimum acquaintance
                 constraints. They are more appealing to users as they
                 guarantee a worst-case acquaintance level in the result
                 group. For efficient processing of GSGQs on large
                 location-based social networks, we devise two
                 social-aware spatial index structures, namely SaR-tree
                 and SaR*-tree. The latter improves on the former by
                 considering both spatial and social distances when
                 clustering objects. Based on SaR-tree and SaR*-tree,
                 novel algorithms are developed to process various
                 GSGQs. Extensive experiments on real datasets Gowalla
                 and Twitter show that our proposed methods
                 substantially outperform the baseline algorithms under
                 various system settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2017:DMK,
  author =       "Kai Zhang and Kaibo Wang and Yuan Yuan and Lei Guo and
                 Rubao Li and Xiaodong Zhang and Bingsheng He and Jiayu
                 Hu and Bei Hua",
  title =        "A distributed in-memory key-value store system on
                 heterogeneous {CPU--GPU} cluster",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "5",
  pages =        "729--750",
  month =        oct,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0479-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Oct 2 16:14:05 MDT 2017",
  bibsource =    "http://portal.acm.org/;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In-memory key-value stores play a critical role in
                 many data-intensive applications to provide
                 high-throughput and low latency data accesses.
                 In-memory key-value stores have several unique
                 properties that include (1) data-intensive operations
                 demanding high memory bandwidth for fast data accesses,
                 (2) high data parallelism and simple computing
                 operations demanding many slim parallel computing
                 units, and (3) a large working set. However, our
                 experiments show that homogeneous multicore CPU systems
                 are increasingly mismatched to the special properties
                 of key-value stores because they do not provide massive
                 data parallelism and high memory bandwidth; the
                 powerful but the limited number of computing cores does
                 not satisfy the demand of the unique data processing
                 task; and the cache hierarchy may not well benefit to
                 the large working set. In this paper, we present the
                 design and implementation of Mega-KV, a distributed
                 in-memory key-value store system on a heterogeneous
                 CPU---GPU cluster. Effectively utilizing the high
                 memory bandwidth and latency hiding capability of GPUs,
                 Mega-KV provides fast data accesses and significantly
                 boosts overall performance and energy efficiency over
                 the homogeneous CPU architectures. Mega-KV shows
                 excellent scalability and processes up to 623-million
                 key-value operations per second on a cluster installed
                 with eight CPUs and eight GPUs, while delivering an
                 efficiency of up to 299-thousand operations per Watt
                 (KOPS/W).",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2017:FIC,
  author =       "Rong-Hua Li and Lu Qin and Jeffrey Xu Yu and Rui Mao",
  title =        "Finding influential communities in massive networks",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "751--776",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0467-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Community search is a problem of finding densely
                 connected subgraphs that satisfy the query conditions
                 in a network, which has attracted much attention in
                 recent years. However, all the previous studies on
                 community search do not consider the influence of a
                 community. In this paper, we introduce a novel
                 community model called k-influential community based on
                 the concept of k-core to capture the influence of a
                 community. Based on this community model, we propose a
                 linear time online search algorithm to find the
                 top-rk-influential communities in a network. To further
                 speed up the influential community search algorithm, we
                 devise a linear space data structure which supports
                 efficient search of the top-rk-influential communities
                 in optimal time. We also propose an efficient algorithm
                 to maintain the data structure when the network is
                 frequently updated. Additionally, we propose a novel
                 I/O-efficient algorithm to find the top-rk-influential
                 communities in a disk-resident graph under the
                 assumption of {{\mathcal {U}}}=O(n)U=O(n), where
                 {{\mathcal {U}}}U and n denote the size of the main
                 memory and the number of nodes, respectively. Finally,
                 we conduct extensive experiments on six real-world
                 massive networks, and the results demonstrate the
                 efficiency and effectiveness of the proposed methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ali:2017:CDP,
  author =       "Syed Muhammad Ali and Robert Wrembel",
  title =        "From conceptual design to performance optimization of
                 {ETL} workflows: current state of research and open
                 problems",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "777--801",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0477-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we discuss the state of the art and
                 current trends in designing and optimizing ETL
                 workflows. We explain the existing techniques for: (1)
                 constructing a conceptual and a logical model of an ETL
                 workflow, (2) its corresponding physical
                 implementation, and (3) its optimization, illustrated
                 by examples. The discussed techniques are analyzed
                 w.r.t. their advantages, disadvantages, and challenges
                 in the context of metrics such as autonomous behavior,
                 support for quality metrics, and support for ETL
                 activities as user-defined functions. We draw
                 conclusions on still open research and technological
                 issues in the field of ETL. Finally, we propose a
                 theoretical ETL framework for ETL optimization.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fang:2017:EEA,
  author =       "Yixiang Fang and Reynold Cheng and Yankai Chen and
                 Siqiang Luo and Jiafeng Hu",
  title =        "Effective and efficient attributed community search",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "803--828",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0482-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a graph G and a vertex q \in Gq'zG, the
                 community search query returns a subgraph of G that
                 contains vertices related to q. Communities, which are
                 prevalent in attributed graphs such as social networks
                 and knowledge bases, can be used in emerging
                 applications such as product advertisement and setting
                 up of social events. In this paper, we investigate the
                 attributed community query (or ACQ), which returns an
                 attributed community (AC) for an attributed graph. The
                 AC is a subgraph of G, which satisfies both structure
                 cohesiveness (i.e., its vertices are tightly connected)
                 and keyword cohesiveness (i.e., its vertices share
                 common keywords). The AC enables a better understanding
                 of how and why a community is formed (e.g., members of
                 an AC have a common interest in music, because they all
                 have the same keyword ``music''). An AC can be
                 ``personalized''; for example, an ACQ user may specify
                 that an AC returned should be related to some specific
                 keywords like ``research'' and ``sports''. To enable
                 efficient AC search, we develop the CL-tree index
                 structure and three algorithms based on it. We further
                 propose efficient algorithms for maintaining the index
                 on dynamic graphs. Moreover, we study two problems that
                 are related to the ACQ problem. We evaluate our
                 solutions on six large graphs. Our results show that
                 ACQ is more effective and efficient than existing
                 community retrieval approaches. Moreover, an AC
                 contains more precise and personalized information than
                 that of existing community search and detection
                 methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lu:2017:MES,
  author =       "Wei Lu and Jiajia Hou and Ying Yan and Meihui Zhang
                 and Xiaoyong Du and Thomas Moscibroda",
  title =        "{MSQL}: efficient similarity search in metric spaces
                 using {SQL}",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "829--854",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0481-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Similarity search is a primitive operation that arises
                 in a large variety of database applications. Typical
                 examples include identifying articles with similar
                 titles, finding similar images and music in a large
                 digital object repository, etc. While there exist a
                 wide spectrum of access methods for similarity queries
                 in metric spaces, a practical solution that can be
                 fully supported by existing RDBMS with high efficiency
                 still remains an open problem. In this paper, we
                 present MSQL, a practical solution for answering
                 similarity queries in metric spaces fully using SQL. To
                 the best of our knowledge, MSQL enables users to find
                 similar objects by submitting SELECT-FROM-WHERE
                 statements only. MSQL provides a uniform indexing
                 scheme based on a standard built-in B^+B+-tree index,
                 with the ability to accelerate the query processing
                 using index seek. Various query optimization techniques
                 are incorporated in MSQL to significantly reduce CPU
                 and I/O cost. We deploy MSQL on top of PostgreSQL.
                 Extensive experiments on various real data sets
                 demonstrate MSQL's benefits, performing up to two
                 orders of magnitude faster than existing
                 domain-specific SQL-based solutions and being
                 comparable to native solutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hung:2017:AVG,
  author =       "Nguyen Quoc Hung and Duong Chi Thang and Nguyen Thanh
                 Tam and Matthias Weidlich and Karl Aberer and Hongzhi
                 Yin and Xiaofang Zhou",
  title =        "Answer validation for generic crowdsourcing tasks with
                 minimal efforts",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "855--880",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0484-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Crowdsourcing has been established as an essential
                 means to scale human computation in diverse Web
                 applications, reaching from data integration to
                 information retrieval. Yet, crowd workers have
                 wide-ranging levels of expertise. Large worker
                 populations are heterogeneous and comprise a
                 significant amount of faulty workers. As a consequence,
                 quality insurance for crowd answers is commonly seen as
                 the Achilles heel of crowdsourcing. Although various
                 techniques for quality control have been proposed in
                 recent years, a post-processing phase in which crowd
                 answers are validated is still required. Such
                 validation, however, is typically conducted by experts,
                 whose availability is limited and whose work incurs
                 comparatively high costs. This work aims at guiding an
                 expert in the validation of crowd answers. We present a
                 probabilistic model that helps to identify the most
                 beneficial validation questions in terms of both
                 improvement in result correctness and detection of
                 faulty workers. By seeking expert feedback on the most
                 problematic cases, we are able to obtain a set of
                 high-quality answers, even if the expert does not
                 validate the complete answer set. Our approach is
                 applicable for a broad range of crowdsourcing tasks,
                 including classification and counting. Our
                 comprehensive evaluation using both real-world and
                 synthetic datasets demonstrates that our techniques
                 save up to 60\% of expert efforts compared to baseline
                 methods when striving for perfect result correctness.
                 In absolute terms, for most cases, we achieve close to
                 perfect correctness after expert input has been sought
                 for only 15\% of the crowdsourcing tasks.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Herschel:2017:SPW,
  author =       "Melanie Herschel and Ralf Diestelk{\"a}mper and
                 Houssem Ben Lahmar",
  title =        "A survey on provenance: {What} for? {What} form?
                 {What} from?",
  journal =      j-VLDB-J,
  volume =       "26",
  number =       "6",
  pages =        "881--906",
  month =        dec,
  year =         "2017",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0486-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Nov 10 08:53:24 MST 2017",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Provenance refers to any information describing the
                 production process of an end product, which can be
                 anything from a piece of digital data to a physical
                 object. While this survey focuses on the former type of
                 end product, this definition still leaves room for many
                 different interpretations of and approaches to
                 provenance. These are typically motivated by different
                 application domains for provenance (e.g.,
                 accountability, reproducibility, process debugging) and
                 varying technical requirements such as runtime,
                 scalability, or privacy. As a result, we observe a wide
                 variety of provenance types and provenance-generating
                 methods. This survey provides an overview of the
                 research field of provenance, focusing on what
                 provenance is used for (what for?), what types of
                 provenance have been defined and captured for the
                 different applications (what form?), and which
                 resources and system requirements impact the choice of
                 deploying a particular provenance solution (what
                 from?). For each of these three key questions, we
                 provide a classification and review the state of the
                 art for each class. We conclude with a summary and
                 possible future research challenges.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wei:2018:RQI,
  author =       "Hao Wei and Jeffrey Xu Yu and Can Lu and Ruoming Jin",
  title =        "Reachability querying: an independent permutation
                 labeling approach",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "1--26",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0468-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Reachability query is a fundamental graph operation
                 which answers whether a vertex can reach another vertex
                 over a large directed graph $G$ with $n$ vertices and m
                 edges and has been extensively studied. In the
                 literature, all the approaches compute a label for
                 every vertex in a graph $G$ by index construction
                 offline. The query time for answering reachability
                 queries online is affected by the quality of the labels
                 computed in index construction. The three main costs
                 are the index construction time, the index size, and
                 the query time. Some of the up-to-date approaches can
                 answer reachability queries efficiently, but spend
                 nonlinear time to construct an index. Some of the
                 up-to-date approaches construct an index in linear time
                 and space, but may need to depth-first search $G$ at
                 run-time in $ O(n + m)$. In this paper, we discuss a
                 new randomized labeling approach, named IP label, to
                 answer reachability queries with probability guarantee,
                 and the randomness is by independent permutation. Two
                 additional labels are also proposed to further enhance
                 the query processing. In addition, to deal with dynamic
                 graphs, we discuss the label maintenance over dynamic
                 graphs and give efficient algorithms for the labels
                 proposed. We conduct extensive experimental studies to
                 compare with the up-to-date approaches using 19 large
                 real datasets used in the existing work and synthetic
                 datasets. We confirm the efficiency and scalability of
                 our approach in static graphs testing, and our
                 maintenance algorithms are about one order of magnitude
                 faster than the existing ones in dynamic graphs
                 testing.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lin:2018:OAS,
  author =       "Chunbin Lin and Jiaheng Lu and Zhewei Wei and Jianguo
                 Wang and Xiaokui Xiao",
  title =        "Optimal algorithms for selecting top-$k$ combinations
                 of attributes: theory and applications",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "27--52",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0485-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional top-$k$ algorithms, e.g., TA and NRA, have
                 been successfully applied in many areas such as
                 information retrieval, data mining and databases. They
                 are designed to discover k objects, e.g., top-$k$
                 restaurants, with highest overall scores aggregated
                 from different attributes, e.g., price and location.
                 However, new emerging applications like query
                 recommendation require providing the best combinations
                 of attributes, instead of objects. The straightforward
                 extension based on the existing top-$k$ algorithms is
                 prohibitively expensive to answer top-$k$ combinations
                 because they need to enumerate all the possible
                 combinations, which is exponential to the number of
                 attributes. In this article, we formalize a novel type
                 of top-$k$ query, called top-$k$, m, which aims to find
                 top-$k$ combinations of attributes based on the overall
                 scores of the top-m objects within each combination,
                 where m is the number of objects forming a combination.
                 We propose a family of efficient top-$k$, m algorithms
                 with different data access methods, i.e., sorted
                 accesses and random accesses and different query
                 certainties, i.e., exact query processing and
                 approximate query processing. Theoretically, we prove
                 that our algorithms are instance optimal and analyze
                 the bound of the depth of accesses. We further develop
                 optimizations for efficient query evaluation to reduce
                 the computational and the memory costs and the number
                 of accesses. We provide a case study on the real
                 applications of top-$k$, m queries for an online
                 biomedical search engine. Finally, we perform
                 comprehensive experiments to demonstrate the
                 scalability and efficiency of top-$k$, m algorithms on
                 multiple real-life datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2018:ESS,
  author =       "Xiang Zhao and Chuan Xiao and Xuemin Lin and Wenjie
                 Zhang and Yang Wang",
  title =        "Efficient structure similarity searches: a
                 partition-based approach",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "53--78",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0487-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graphs are widely used to model complex data in many
                 applications, such as bioinformatics, chemistry, social
                 networks, pattern recognition. A fundamental and
                 critical query primitive is to efficiently search
                 similar structures in a large collection of graphs.
                 This article mainly studies threshold-based graph
                 similarity search with edit distance constraints.
                 Existing solutions to the problem utilize fixed-size
                 overlapping substructures to generate candidates, and
                 thus become susceptible to large vertex degrees and
                 distance thresholds. In this article, we present a
                 partition-based approach to tackle the problem. By
                 dividing data graphs into variable-size non-overlapping
                 partitions, the edit distance constraint is converted
                 to a graph containment constraint for candidate
                 generation. We develop efficient query processing
                 algorithms based on the novel paradigm. Moreover,
                 candidate-pruning techniques and an improved graph edit
                 distance verification algorithm are developed to boost
                 the performance. In addition, a cost-aware graph
                 partitioning method is devised to optimize the index.
                 Extending the partition-based filtering paradigm, we
                 present a solution to the top-$k$ k graph similarity
                 search problem, where tailored filtering, look-ahead
                 and computation-sharing strategies are exploited. Using
                 both public real-life and synthetic datasets, extensive
                 experiments demonstrate that our approaches
                 significantly outperform the baseline and its
                 alternatives.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yu:2018:DSS,
  author =       "Weiren Yu and Xuemin Lin and Wenjie Zhang and Julie A.
                 Mccann",
  title =        "Dynamical {SimRank} search on time-varying networks",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "79--104",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0488-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "SimRank is an appealing pair-wise similarity measure
                 based on graph structure. It iteratively follows the
                 intuition that two nodes are assessed as similar if
                 they are pointed to by similar nodes. Many real graphs
                 are large, and links are constantly subject to minor
                 changes. In this article, we study the efficient
                 dynamical computation of all-pairs SimRanks on
                 time-varying graphs. Existing methods for the dynamical
                 SimRank computation [e.g., LTSF (Shao et al. in PVLDB
                 8(8):838--849, 2015) and READS (Zhang et al. in PVLDB
                 10(5):601--612, 2017)] mainly focus on top-$k$ search
                 with respect to a given query. For all-pairs dynamical
                 SimRank search, Li et al.'s approach (Li et al. in
                 EDBT, 2010) was proposed for this problem. It first
                 factorizes the graph via a singular value decomposition
                 (SVD) and then incrementally maintains such a
                 factorization in response to link updates at the
                 expense of exactness. As a result, all pairs of
                 SimRanks are updated approximately, yielding $ O(r^4
                 n^2) $ time and $ O(r^2 n^2) $ memory in a graph with
                 $n$ nodes, where r is the target rank of the low-rank
                 SVD. Our solution to the dynamical computation of
                 SimRank comprises of five ingredients: (1) We first
                 consider edge update that does not accompany new node
                 insertions. We show that the SimRank update
                 {\varvec{\Delta }}{} \mathbf{S} \Delta S in response to
                 every link update is expressible as a rank-one
                 Sylvester matrix equation. This provides an incremental
                 method requiring $ O(K n^2)$ time and $ O(n^2)$ memory
                 in the worst case to update n^2n2 pairs of similarities
                 for $K$ iterations. (2) To speed up the computation
                 further, we propose a lossless pruning strategy that
                 captures the ``affected areas'' of {\varvec{\Delta }}{}
                 \mathbf{S} \Delta S to eliminate unnecessary retrieval.
                 This reduces the time of the incremental SimRank to $
                 O(K(m + |{\textsf {AFF}}|))$, where $m$ is the number
                 of edges in the old graph, and $ |{\textsf {AFF}}| (\le
                 n^2)$ is the size of ``affected areas'' in $ \Delta S$,
                 and in practice, $ |{\textsf {AFF}}| \ll n^2$. (3) We
                 also consider edge updates that accompany node
                 insertions, and categorize them into three cases,
                 according to which end of the inserted edge is a new
                 node. For each case, we devise an efficient incremental
                 algorithm that can support new node insertions and
                 accurately update the affected SimRanks. (4) We next
                 study batch updates for dynamical SimRank computation,
                 and design an efficient batch incremental method that
                 handles ``similar sink edges'' simultaneously and
                 eliminates redundant edge updates. (5) To achieve
                 linear memory, we devise a memory-efficient strategy
                 that dynamically updates all pairs of SimRanks column
                 by column in just $ O(K n + m)$ memory, without the
                 need to store all $ (n^2)$ pairs of old SimRank scores.
                 Experimental studies on various datasets demonstrate
                 that our solution substantially outperforms the
                 existing incremental SimRank methods and is faster and
                 more memory-efficient than its competitors on
                 million-scale graphs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sagi:2018:NBE,
  author =       "Tomer Sagi and Avigdor Gal",
  title =        "Non-binary evaluation measures for big data
                 integration",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "105--126",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0489-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The evolution of data accumulation, management,
                 analytics, and visualization has led to the coining of
                 the term big data, which challenges the task of data
                 integration. This task, common to any matching problem
                 in computer science involves generating alignments
                 between structured data in an automated fashion.
                 Historically, set-based measures, based upon binary
                 similarity matrices (match/non-match), have dominated
                 evaluation practices of matching tasks. However, in the
                 presence of big data, such measures no longer suffice.
                 In this work, we propose evaluation methods for
                 non-binary matrices as well. Non-binary evaluation is
                 formally defined together with several new, non-binary
                 measures using a vector space representation of
                 matching outcome. We provide empirical analyses of the
                 usefulness of non-binary evaluation and show its
                 superiority over its binary counterparts in several
                 problem domains.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wu:2018:SOR,
  author =       "Yubao Wu and Xiang Zhang and Yuchen Bian and Zhipeng
                 Cai and Xiang Lian and Xueting Liao and Fengpan Zhao",
  title =        "Second-order random walk-based proximity measures in
                 graph analysis: formulations and algorithms",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "1",
  pages =        "127--152",
  month =        feb,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0490-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 6 18:41:42 MST 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Measuring the proximity between different nodes is a
                 fundamental problem in graph analysis. Random
                 walk-based proximity measures have been shown to be
                 effective and widely used. Most existing random walk
                 measures are based on the first-order Markov model,
                 i.e., they assume that the next step of the random
                 surfer only depends on the current node. However, this
                 assumption neither holds in many real-life applications
                 nor captures the clustering structure in the graph. To
                 address the limitation of the existing first-order
                 measures, in this paper, we study the second-order
                 random walk measures, which take the previously visited
                 node into consideration. While the existing first-order
                 measures are built on node-to-node transition
                 probabilities, in the second-order random walk, we need
                 to consider the edge-to-edge transition probabilities.
                 Using incidence matrices, we develop simple and elegant
                 matrix representations for the second-order proximity
                 measures. A desirable property of the developed
                 measures is that they degenerate to their original
                 first-order forms when the effect of the previous step
                 is zero. We further develop Monte Carlo methods to
                 efficiently compute the second-order measures and
                 provide theoretical performance guarantees.
                 Experimental results show that in a variety of
                 applications, the second-order measures can
                 dramatically improve the performance compared to their
                 first-order counterparts.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2018:PPC,
  author =       "Bin Yang and Jian Dai and Chenjuan Guo and Christian
                 S. Jensen and Jilin Hu",
  title =        "{PACE}: a {PAth-CEntric} paradigm for stochastic path
                 finding",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "153--178",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0491-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the growing volumes of vehicle trajectory data,
                 it becomes increasingly possible to capture
                 time-varying and uncertain travel costs, e.g., travel
                 time, in a road network. The current paradigm for doing
                 so is edge-centric: it represents a road network as a
                 weighted graph and splits trajectories into small
                 fragments that fit the underlying edges to assign
                 time-varying and uncertain weights to edges. It then
                 applies path finding algorithms to the resulting,
                 weighted graph. We propose a new PAth-CEntric paradigm,
                 PACE, that targets more accurate and more efficient
                 path cost estimation and path finding. By assigning
                 weights to paths, PACE avoids splitting trajectories
                 into small fragments. We solve two fundamental problems
                 to establish the PACE paradigm: (i) how to compute
                 accurately the travel cost distribution of a path and
                 (ii) how to conduct path finding for a
                 source---destination pair. To solve the first problem,
                 given a departure time and a query path, we show how to
                 select an optimal set of paths that cover the query
                 path and such that the weights of the paths enable the
                 most accurate joint cost distribution estimation for
                 the query path. The joint cost distribution models well
                 the travel cost dependencies among the edges in the
                 query path, which in turn enables accurate estimation
                 of the cost distribution of the query path. We solve
                 the second problem by showing that the resulting path
                 cost distribution estimation method satisfies an
                 incremental property that enables the method to be
                 integrated seamlessly into existing stochastic path
                 finding algorithms. Further, we propose a new
                 stochastic path finding algorithm that fully explores
                 the improved accuracy and efficiency provided by PACE.
                 Empirical studies with trajectory data from two
                 different cities offer insight into the design
                 properties of the PACE paradigm and offer evidence that
                 PACE is accurate, efficient, and effective in
                 real-world settings.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hu:2018:RAP,
  author =       "Jilin Hu and Bin Yang and Chenjuan Guo and Christian
                 S. Jensen",
  title =        "Risk-aware path selection with time-varying, uncertain
                 travel costs: a time series approach",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "179--200",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0494-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We address the problem of choosing the best paths
                 among a set of candidate paths between the same
                 origin---destination pair. This functionality is used
                 extensively when constructing origin---destination
                 matrices in logistics and flex transportation. Because
                 the cost of a path, e.g., travel time, varies over time
                 and is uncertain, there is generally no single best
                 path. We partition time into intervals and represent
                 the cost of a path during an interval as a random
                 variable, resulting in an uncertain time series for
                 each path. When facing uncertainties, users generally
                 have different risk preferences, e.g., risk-loving or
                 risk-averse, and thus prefer different paths. We
                 develop techniques that, for each time interval, are
                 able to find paths with non-dominated lowest costs
                 while taking the users' risk preferences into account.
                 We represent risk by means of utility function
                 categories and show how the use of first-order and two
                 kinds of second-order stochastic dominance
                 relationships among random variables makes it possible
                 to find all paths with non-dominated lowest costs. We
                 report on empirical studies with large uncertain time
                 series collections derived from a 2-year GPS data set.
                 The study offers insight into the performance of the
                 proposed techniques, and it indicates that the best
                 techniques combine to offer an efficient and robust
                 solution.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Su:2018:PDP,
  author =       "Dong Su and Jianneng Cao and Ninghui Li and Min Lyu",
  title =        "{PrivPfC}: differentially private data publication for
                 classification",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "201--223",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0492-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we tackle the problem of constructing a
                 differentially private synopsis for the classification
                 analysis. Several state-of-the-art methods follow the
                 structure of existing classification algorithms and are
                 all iterative, which is suboptimal due to the locally
                 optimal choices and division of the privacy budget
                 among many sequentially composed steps. We propose
                 PrivPfC, a new differentially private method for
                 releasing data for classification. The key idea
                 underlying PrivPfC is to privately select, in a single
                 step, a grid, which partitions the data domain into a
                 number of cells. This selection is done by using the
                 exponential mechanism with a novel quality function,
                 which maximizes the expected number of correctly
                 classified records by a histogram classifier. PrivPfC
                 supports both the binary classification and the
                 multiclass classification. Through extensive
                 experiments on real datasets, we demonstrate PrivPfC 's
                 superiority over the state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2018:AKS,
  author =       "Dongxiang Zhang and Yuchen Li and Xin Cao and Jie Shao
                 and Heng Tao Shen",
  title =        "Augmented keyword search on spatial entity databases",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "225--244",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0497-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we propose a new type of query that
                 augments the spatial keyword search with an additional
                 boolean expression constraint. The query is issued
                 against a corpus of structured or semi-structured
                 spatial entities and is very useful in applications
                 like mobile search and targeted location-aware
                 advertising. We devise three types of indexing and
                 filtering strategies. First, we utilize the hybrid
                 IR$^2$-tree and propose a novel hashing scheme for
                 efficient pruning. Second, we propose an inverted
                 index-based solution, named BE-Inv, that is more cache
                 conscious and exhibits great pruning power for boolean
                 expression matching. Our third method, named SKB-Inv,
                 adopts a novel two-level partitioning scheme to
                 organize the spatial entities into inverted lists and
                 effectively facilitate the pruning in the spatial,
                 textual, and boolean expression dimensions. In
                 addition, we propose an adaptive query processing
                 strategy that takes into account the selectivity of
                 query keywords and predicates for early termination. We
                 conduct our experiments using two real datasets with
                 3.5 million Foursquare venues and 50 million Twitter
                 geo-profiles. The results show that the methods based
                 on inverted index are superior to the hybrid
                 {IR}$^2$-tree; and SKB-Inv achieves the best
                 performance.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deutch:2018:EPT,
  author =       "Daniel Deutch and Amir Gilad and Yuval Moskovitch",
  title =        "Efficient provenance tracking for datalog using
                 top-$k$ queries",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "245--269",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0496-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Highly expressive declarative languages, such as
                 datalog, are now commonly used to model the operational
                 logic of data-intensive applications. The typical
                 complexity of such datalog programs, and the large
                 volume of data that they process, call for result
                 explanation. Results may be explained through the
                 tracking and presentation of data provenance, defined
                 here as the set of derivation trees of a given fact.
                 While informative, the size of such full provenance
                 information is typically too large and complex (even
                 when compactly represented) to allow displaying it to
                 the user. To this end, we propose a novel top-k query
                 language for querying datalog provenance, supporting
                 selection criteria based on tree patterns and ranking
                 based on the rules and database facts used in
                 derivation. We propose an efficient novel algorithm
                 that computes in polynomial data complexity a compact
                 representation of the top-k trees which may be
                 explicitly constructed in linear time with respect to
                 their size. We further experimentally study the
                 algorithm performance, showing its scalability even for
                 complex datalog programs where full provenance tracking
                 is infeasible.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2018:ARQ,
  author =       "Junfeng Zhou and Jeffrey Xu Yu and Na Li and Hao Wei
                 and Ziyang Chen and Xian Tang",
  title =        "Accelerating reachability query processing based on {$
                 \vec {\rm DAG} $} reduction",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "2",
  pages =        "271--296",
  month =        apr,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0495-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Mar 24 08:39:19 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Answering reachability queries is one of the
                 fundamental graph operations. The existing approaches
                 build indexes and answer reachability queries on a
                 directed acyclic graph (DAG) GG, which is constructed
                 by coalescing each strongly connected component of the
                 given directed graph $ \mathcal {G} $ into a node of
                 GG. Considering that GG can still be large to be
                 processed efficiently, there are studies to further
                 reduce GG to a smaller graph. However, these approaches
                 suffer from either inefficiency in answering
                 reachability queries, or cannot scale to large graphs.
                 In this paper, we study DAG reduction to accelerate
                 reachability query processing, which reduces the size
                 of GG by computing transitive reduction (TR) followed
                 by computing equivalence reduction (ER). For TR, we
                 propose a bottom-up algorithm, namely buTR, which
                 removes from GG all redundant edges to get the unique
                 smallest DAGG$^{tGt}$ satisfying that G$^{tGt}$ has the
                 same transitive closure as that of GG. For ER, we
                 propose a divide-and-conquer algorithm, namely
                 linear-ER. Given the result G$^{tGt}$ of TR, linear-ER
                 gets a smaller DAGG$^\varepsilon $G in linear time
                 based on equivalence relationship between nodes in GG.
                 Our DAG reduction approaches (TR and ER) significantly
                 improve the cost of time and space and can be scaled to
                 large graphs. Based on the result of DAG reduction, we
                 further propose a graph decomposition-based algorithm
                 to efficiently answer reachability queries. We confirm
                 the efficiency of our approaches by extensive
                 experimental studies for TR, ER, and reachability query
                 processing using 20 real datasets. The complete source
                 code is available for download at
                 https://pan.baidu.com/s/1skHBXXN.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Belesiotis:2018:STU,
  author =       "Alexandros Belesiotis and Dimitrios Skoutas and
                 Christodoulos Efstathiades and Vassilis Kaffes and
                 Dieter Pfoser",
  title =        "Spatio-textual user matching and clustering based on
                 set similarity joins",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "297--320",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0498-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper addresses the problem of matching and
                 clustering users based on their geolocated posts.
                 Individual posts are matched according to spatial
                 distance and textual similarity thresholds. Then, user
                 similarity is defined as the ratio of their posts that
                 match each other. Based on these criteria, we introduce
                 efficient algorithms for identifying pairs of matching
                 users in a large dataset, as well as for computing the
                 top-k matching pairs. We then proceed to identify
                 spatio-textual user clusters. For this purpose, we use
                 the Louvain method for community detection. Our
                 algorithms operate on a user graph where edge weights
                 represent spatio-textual user similarities. Since the
                 exact user similarity graph can be prohibitively
                 expensive to compute, we exploit our previous
                 algorithms to derive efficient methods that reduce
                 execution time both by avoiding to compute exact
                 similarity scores and by reducing the number of
                 similarity calculations performed. The presented
                 solution allows a trade-off between computation time
                 and quality of detected clusters. The proposed
                 algorithms are evaluated using three real-world
                 datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2018:GSG,
  author =       "Lei Li and Kai Zheng and Sibo Wang and Wen Hua and
                 Xiaofang Zhou",
  title =        "Go slow to go fast: minimal on-road time route
                 scheduling with parking facilities using historical
                 trajectory",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "321--345",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0499-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "For thousands of years, people have been innovating
                 new technologies to make their travel faster, the
                 latest of which is GPS technology that is used by
                 millions of drivers every day. The routes recommended
                 by a GPS device are computed by path planning
                 algorithms (e.g., fastest path algorithm), which aim to
                 minimize a certain objective function (e.g., travel
                 time) under the current traffic condition. When the
                 objective is to arrive the destination as early as
                 possible, waiting during travel is not an option as it
                 will only increase the total travel time due to the
                 First-In-First-Out property of most road networks.
                 However, some businesses such as logistics companies
                 are more interested in optimizing the actual on-road
                 time of their vehicles (i.e., while the engine is
                 running) since it is directly related to the
                 operational cost. At the same time, the drivers'
                 trajectories, which can reveal the traffic conditions
                 on the roads, are also collected by various service
                 providers. Compared to the existing speed profile
                 generation methods, which mainly rely on traffic
                 monitor systems, the trajectory-based method can cover
                 a much larger space and is much cheaper and flexible to
                 obtain. This paper proposes a system, which has an
                 online component and an offline component, to solve the
                 minimal on-road time problem using the trajectories.
                 The online query answering component studies how
                 parking facilities along the route can be leveraged to
                 avoid predicted traffic jam and eventually reduce the
                 drivers' on-road time, while the offline component
                 solves how to generate speed profiles of a road network
                 from historical trajectories. The challenging part of
                 the routing problem of the online component lies in the
                 computational complexity when determining if it is
                 beneficial to wait on specific parking places and the
                 time of waiting to maximize the benefit. To cope with
                 this challenging problem, we propose two efficient
                 algorithms using minimum on-road travel cost function
                 to answer the query. We further introduce several
                 approximation methods to speed up the query answering,
                 with an error bound guaranteed. The offline speed
                 profile generation component makes use of historical
                 trajectories to provide the traveling time for the
                 online component. Extensive experiments show that our
                 method is more efficient and accurate than baseline
                 approaches extended from the existing path planning
                 algorithms, and our speed profile is accurate and space
                 efficient.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yao:2018:SDT,
  author =       "Chang Yao and Meihui Zhang and Qian Lin and Beng Chin
                 Ooi and Jiatao Xu",
  title =        "Scaling distributed transaction processing and
                 recovery based on dependency logging",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "347--368",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0500-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Dependency graph-based concurrency control (DGCC)
                 protocol has been shown to achieve good performance on
                 multi-core in-memory system. DGCCseparates contention
                 resolution from the transaction execution and employs
                 dependency graphs to derive serializable transaction
                 schedules. However, distributed transactions complicate
                 the dependency resolution, and therefore, an effective
                 transaction partitioning strategy is essential to
                 reduce expensive multi-node distributed transactions.
                 During failure recovery, log must be examined from the
                 last checkpoint onward and the affected transactions
                 are re-executed based on the way they are partitioned
                 and executed. Existing approaches treat both
                 transaction management and recovery as two separate
                 problems, even though recovery is dependent on the
                 sequence in which transactions are executed. In this
                 paper, we propose to treat the transaction management
                 and recovery problems as one. We first propose an
                 efficient distributed dependency graph-based
                 concurrency control (DistDGCC) protocol for handling
                 transactions spanning multiple nodes and propose a new
                 novel and efficient logging protocol called dependency
                 logging that also makes use of dependency graphs for
                 efficient logging and recovery. DistDGCC optimizes the
                 average cost for each distributed transaction by
                 processing transactions in batches. Moreover, it also
                 reduces the effects of thread blocking caused by
                 distributed transactions and consequently improves the
                 runtime performance. Further, dependency logging
                 exploits the same data structure that is used by
                 DistDGCC to reduce the logging overhead, as well as the
                 logical dependency information to improve the recovery
                 parallelism. Extensive experiments are conducted to
                 evaluate the performance of our proposed technique
                 against state-of-the-art techniques. Experimental
                 results show that DistDGCC is efficient and scalable,
                 and dependency logging supports fast recovery with
                 marginal runtime overhead. Hence, the overall system
                 performance is significantly improved as a result.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chodpathumwan:2018:CEC,
  author =       "Yodsawalai Chodpathumwan and Ali Vakilian and Arash
                 Termehchy and Amir Nayyeri",
  title =        "Cost-effective conceptual design using taxonomies",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "369--394",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0501-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "It is known that annotating entities in unstructured
                 and semi-structured datasets by their concepts improves
                 the effectiveness of answering queries over these
                 datasets. Ideally, one would like to annotate entities
                 of all relevant concepts in a dataset. However, it
                 takes substantial time and computational resources to
                 annotate concepts in large datasets, and an
                 organization may have sufficient resources to annotate
                 only a subset of relevant concepts. Clearly, it would
                 like to annotate a subset of concepts that provides the
                 most effective answers to queries over the dataset. We
                 propose a formal framework that quantifies the amount
                 by which annotating entities of concepts from a
                 taxonomy in a dataset improves the effectiveness of
                 answering queries over the dataset. Because the problem
                 is \mathbf {NP}NP-hard, we propose efficient
                 approximation and pseudo-polynomial time algorithms for
                 several cases of the problem. Our extensive empirical
                 studies validate our framework and show accuracy and
                 efficiency of our algorithms.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Shang:2018:PTS,
  author =       "Shuo Shang and Lisi Chen and Zhewei Wei and Christian
                 S. Jensen and Kai Zheng and Panos Kalnis",
  title =        "Parallel trajectory similarity joins in spatial
                 networks",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "395--420",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0502-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The matching of similar pairs of objects, called
                 similarity join, is fundamental functionality in data
                 management. We consider two cases of trajectory
                 similarity joins (TS-Joins), including a
                 threshold-based join (Tb-TS-Join) and a top-k TS-Join
                 (k-TS-Join), where the objects are trajectories of
                 vehicles moving in road networks. Given two sets of
                 trajectories and a threshold \theta `?, the Tb-TS-Join
                 returns all pairs of trajectories from the two sets
                 with similarity above \theta `?. In contrast, the
                 k-TS-Join does not take a threshold as a parameter, and
                 it returns the top-k most similar trajectory pairs from
                 the two sets. The TS-Joins target diverse applications
                 such as trajectory near-duplicate detection, data
                 cleaning, ridesharing recommendation, and traffic
                 congestion prediction. With these applications in mind,
                 we provide purposeful definitions of similarity. To
                 enable efficient processing of the TS-Joins on large
                 sets of trajectories, we develop search space pruning
                 techniques and enable use of the parallel processing
                 capabilities of modern processors. Specifically, we
                 present a two-phase divide-and-conquer search framework
                 that lays the foundation for the algorithms for the
                 Tb-TS-Join and the k-TS-Join that rely on different
                 pruning techniques to achieve efficiency. For each
                 trajectory, the algorithms first find similar
                 trajectories. Then they merge the results to obtain the
                 final result. The algorithms for the two joins exploit
                 different upper and lower bounds on the spatiotemporal
                 trajectory similarity and different heuristic
                 scheduling strategies for search space pruning. Their
                 per-trajectory searches are independent of each other
                 and can be performed in parallel, and the mergings have
                 constant cost. An empirical study with real data offers
                 insight in the performance of the algorithms and
                 demonstrates that they are capable of outperforming
                 well-designed baseline algorithms by an order of
                 magnitude.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:2018:PRA,
  author =       "Juchang Lee and Wook-Shin Han and Hyoung Jun Na and
                 Chang Gyoo Park and Kyu Hwan Kim and Deok Hoe Kim and
                 Joo Yeon Lee and Sang Kyun Cha and Seunghyun Moon",
  title =        "Parallel replication across formats for scaling out
                 mixed {OLTP\slash OLAP} workloads in main-memory
                 databases",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "3",
  pages =        "421--444",
  month =        jun,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0503-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Jun 8 17:24:12 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Modern in-memory database systems are facing the need
                 of efficiently supporting mixed workloads of OLTP and
                 OLAP. A conventional approach to this requirement is to
                 rely on ETL-style, application-driven data replication
                 between two very different OLTP and OLAP systems,
                 sacrificing real-time reporting on operational data. An
                 alternative approach is to run OLTP and OLAP workloads
                 in a single machine, which eventually limits the
                 maximum scalability. In order to tackle this
                 challenging problem, we propose a novel database
                 replication architecture called HANA Asynchronous
                 Parallel Table Replication (ATR). ATR supports OLTP
                 workloads in one primary machine, while it supports
                 heavy OLAP workloads in replicas. Here, row store
                 formats can be used for OLTP transactions at the
                 primary, while column store formats are used for OLAP
                 analytical queries at the replicas. ATR is designed to
                 support elastic scalability of OLAP query performance,
                 while it minimizes the overhead for transaction
                 processing at the primary and minimizes CPU consumption
                 for replayed transactions at the replicas. ATR employs
                 a novel optimistic lock-free parallel log replay scheme
                 which exploits characteristics of multi-version
                 concurrency control (MVCC) to enable real-time
                 reporting by minimizing the propagation delay between
                 the primary and replicas. It supports adaptive query
                 routing depending on its predefined acceptable
                 staleness range. Through extensive experiments with a
                 concrete implementation available in a commercial
                 product, we demonstrate that ATR achieves sub-second
                 visibility delay even for update-intensive workloads,
                 providing scalable OLAP performance without notable
                 overhead to the primary. In addition, with extension of
                 ATR to eager parallel replication, we demonstrate how
                 the parallel log replay and its log-less replica
                 recovery mechanisms improve run-time transaction
                 performance under eager replication.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Choudhury:2018:FOL,
  author =       "Farhana Murtaza Choudhury and J. Shane Culpepper and
                 Zhifeng Bao and Timos Sellis",
  title =        "Finding the optimal location and keywords in
                 obstructed and unobstructed space",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "445--470",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0504-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The problem of optimal location selection based on
                 reverse k nearest neighbor (R kk NN) queries has been
                 extensively studied in spatial databases. In this work,
                 we present a related query, denoted as a Maximized
                 Bichromatic Reverse Spatial Textual k Nearest Neighbor
                 (MaxST) query, that finds an optimal location and a set
                 of keywords for an object so that the object is a kk NN
                 object for as many users as possible. Such a query has
                 many practical applications including advertisements,
                 where the query is to find the location and the text
                 contents to include in an advertisement so that it is
                 relevant to the maximum number of users. The visibility
                 of the advertisements also has an important role in the
                 users' interests. In this work, we address two
                 instances of the spatial relevance when ranking items:
                 (1) the Euclidean distance and (2) the visibility. We
                 carefully design a series of index structures and
                 approaches to answer the MaxST for both instances.
                 Specifically, we present (1) the Grp-topk approach that
                 requires the computation of the top-k objects for all
                 of the users first and then applies various pruning
                 techniques to find the optimal location and keywords;
                 (2) the Indiv-U approach, where we use similarity
                 estimations to avoid computing the top-k objects of the
                 users that cannot be a final result; and (3) the
                 Index-U approach where we propose a hierarchical index
                 structure over the users to improve pruning. We show
                 that the keyword selection component in MaxST queries
                 is NP-hard and present both approximate and exact
                 solutions for the problem.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2018:ESC,
  author =       "Jianye Yang and Wenjie Zhang and Shiyu Yang and Ying
                 Zhang and Xuemin Lin and Long Yuan",
  title =        "Efficient set containment join",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "471--495",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0505-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "In this paper, we study the problem of set containment
                 join. Given two collections $ \mathcal {R} $ and $
                 \mathcal {S} $ of records, the set containment join $
                 \mathcal {R} \bowtie_\subseteq \mathcal {S} $ retrieves
                 all record pairs $ \{ (r, s) \} \in \mathcal {R} \times
                 \mathcal {S} $ such that $ r \subseteq s r \subseteq s
                 $. This problem has been extensively studied in the
                 literature and has many important applications in
                 commercial and scientific fields. Recent research
                 focuses on the in-memory set containment join
                 algorithms, and several techniques have been developed
                 following intersection-oriented or union-oriented
                 computing paradigms. Nevertheless, we observe that two
                 computing paradigms have their limits due to the nature
                 of the intersection and union operators. Particularly,
                 intersection-oriented method relies on the intersection
                 of the relevant inverted lists built on the elements of
                 $ \mathcal {S} $. A nice property of the
                 intersection-oriented method is that the join
                 computation is verification free. However, the number
                 of records explored during the join process may be
                 large because there are multiple replicas for each
                 record in $ \mathcal {S} $. On the other hand, the
                 union-oriented method generates a signature for each
                 record in $ \mathcal {R} $ and the candidate pairs are
                 obtained by the union of the inverted lists of the
                 relevant signatures. The candidate size of the
                 union-oriented method is usually small because each
                 record contributes only one replica in the index.
                 Unfortunately, union-oriented method needs to verify
                 the candidate pairs, which may be cost expensive
                 especially when the join result size is large. As a
                 matter of fact, the state-of-the-art union-oriented
                 solution is not competitive compared to the
                 intersection-oriented ones. In this paper, we propose a
                 new union-oriented method, namely TT-Join, which not
                 only enhances the advantage of the previous
                 union-oriented methods but also integrates the goodness
                 of intersection-oriented methods by imposing a variant
                 of prefix tree structure. We conduct extensive
                 experiments on 20 real-life datasets and synthetic
                 datasets by comparing our method with 7 existing
                 methods. The experiment results demonstrate that
                 TT-Join significantly outperforms the existing
                 algorithms on most of the datasets and can achieve up
                 to two orders of magnitude speedup. Furthermore, to
                 support large scale of datasets, we extend our
                 techniques to distributed systems on top of MapReduce
                 framework. With the help of careful designed load-aware
                 distribution mechanisms, our distributed join algorithm
                 can achieve up to an order of magnitude speedup than
                 the baselines methods.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hao:2018:DRU,
  author =       "Shuang Hao and Nan Tang and Guoliang Li and Jian Li
                 and Jianhua Feng",
  title =        "Distilling relations using knowledge bases",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "497--519",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0506-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a relational table, we study the problem of
                 detecting and repairing erroneous data, as well as
                 marking correct data, using well curated knowledge
                 bases (KBs). We propose detective rules (DRs), a new
                 type of data cleaning rules that can make actionable
                 decisions on relational data, by building connections
                 between a relation and a KB. The main invention is that
                 a DR simultaneously models two opposite semantics of an
                 attribute belonging to a relation using types and
                 relationships in a KB: The positive semantics explains
                 how its value should be linked to other attribute
                 values in a correct tuple, and the negative semantics
                 indicate how a wrong attribute value is connected to
                 other correct attribute values within the same tuple.
                 Naturally, a DR can mark correct values in a tuple if
                 it matches the positive semantics. Meanwhile, a DR can
                 detect/repair an error if it matches the negative
                 semantics. We study fundamental problems associated
                 with DRs, e.g., rule consistency and rule implication.
                 We present efficient algorithms to apply DRs to clean a
                 relation, based on rule order selection and inverted
                 indexes. Moreover, we discuss approaches on how to
                 generate DRs from examples. Extensive experiments,
                 using both real-world and synthetic datasets, verify
                 the effectiveness and efficiency of applying DRs in
                 practice.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Borovica-Gajic:2018:SSR,
  author =       "Renata Borovica-Gajic and Stratos Idreos and Anastasia
                 Ailamaki and Marcin Zukowski and Campbell Fraser",
  title =        "{Smooth Scan}: robust access path selection without
                 cardinality estimation",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "521--545",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0507-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Query optimizers depend heavily on statistics
                 representing column distributions to create good query
                 plans. In many cases, though, statistics are outdated
                 or nonexistent, and the process of refreshing
                 statistics is very expensive, especially for ad hoc
                 workloads on ever bigger data. This results in
                 suboptimal plans that severely hurt performance. The
                 core of the problem is the fixed decision on the type
                 of physical operators that comprise a query plan. This
                 paper makes a case for continuous adaptation and
                 morphing of physical operators throughout their
                 lifetime, by adjusting their behavior in accordance
                 with the observed statistical properties of the data at
                 run time. We demonstrate the benefits of the new
                 paradigm by designing and implementing an adaptive
                 access path operator called Smooth Scan, which morphs
                 continuously within the space of index access and full
                 table scan. Smooth Scan behaves similarly to an index
                 scan for low selectivity; if selectivity increases,
                 however, Smooth Scan progressively morphs its behavior
                 toward a sequential scan. As a result, a system with
                 Smooth Scan requires no optimization decisions on the
                 access paths up front. Additionally, by depending only
                 on the result distribution and eschewing statistics and
                 cardinality estimates altogether, Smooth Scan ensures
                 repeatable execution across multiple query invocations.
                 Smooth Scan implemented in PostgreSQL demonstrates
                 robust, near-optimal performance on micro-benchmarks
                 and real-life workloads, while being statistics
                 oblivious at the same time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Herrmann:2018:MSV,
  author =       "Kai Herrmann and Hannes Voigt and Torben Bach Pedersen
                 and Wolfgang Lehner",
  title =        "Multi-schema-version data management: data
                 independence in the twenty-first century",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "547--571",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0508-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Agile software development allows us to continuously
                 evolve and run a software system. However, this is not
                 possible in databases, as established methods are very
                 expensive, error-prone, and far from agile. We present
                 InVerDa, a multi-schema-version database management
                 system (MSVDB) for agile database development. MSVDBs
                 realize co-existing schema versions within one
                 database, where each schema version behaves like a
                 regular single-schema database and write operations are
                 propagated between schema versions. Developers use a
                 relationally complete and bidirectional database
                 evolution language (BiDEL) to easily evolve existing
                 schema versions to new ones. BiDEL scripts are more
                 robust, orders of magnitude shorter, and cause only a
                 small performance overhead compared to handwritten SQL
                 scripts. We formally guarantee data independence: no
                 matter how the data of the co-existing schema versions
                 is physically materialized, each schema version is
                 guaranteed to behave like a regular database. Since,
                 the chosen physical materialization significantly
                 determines the overall performance, we equip database
                 administrators with an advisor that proposes an
                 optimized materialization for the current workload,
                 which can improve the performance by orders of
                 magnitude compared to na{\"\i}ve solutions. To our best
                 knowledge, we are the first to facilitate agile
                 evolution of production databases with full support of
                 co-existing schema versions and formally guaranteed
                 data independence.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Szlichta:2018:ECD,
  author =       "Jaroslaw Szlichta and Parke Godfrey and Lukasz Golab
                 and Mehdi Kargar and Divesh Srivastava",
  title =        "Effective and complete discovery of bidirectional
                 order dependencies via set-based axioms",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "4",
  pages =        "573--591",
  month =        aug,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0510-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Sep 8 07:39:26 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Integrity constraints (ICs) are useful for expressing
                 and enforcing application semantics. Formulating ICs
                 manually, however, requires domain expertise, is prone
                 to human error, and can be exceedingly time-consuming.
                 Thus, methods for automatic discovery have been
                 developed for some classes of ICs, such as functional
                 dependencies (FDs), and recently, order dependencies
                 (ODs). ODs properly subsume FDs and can express
                 business rules involving order; e.g., an employee who
                 pays higher taxes has a higher salary than another
                 employee. Bidirectional ODs further allow different
                 ordering directions, ascending and descending, as in
                 SQL's order-by; e.g., a student with an alphabetically
                 lower letter grade has a higher percentage grade than
                 another student. We address the limitations of prior
                 work on automatic OD discovery, which has factorial
                 complexity, is incomplete, and is not concise. We
                 present an efficient bidirectional OD discovery
                 algorithm enabled by a novel polynomial mapping to a
                 canonical form, and a sound and complete set of axioms
                 for canonical bidirectional ODs to prune the search
                 space. Our algorithm has exponential worst-case time
                 complexity in the number of attributes and linear
                 complexity in the number of tuples. We prove that it
                 produces a complete and minimal set of bidirectional
                 ODs, and we experimentally show orders of magnitude
                 performance improvements over the prior
                 state-of-the-art methodologies.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chaudhuri:2018:SIB,
  author =       "Surajit Chaudhuri and Jayant R. Haritsa",
  title =        "Special issue on best papers of {VLDB 2016}",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "593--594",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0520-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Interlandi:2018:ADP,
  author =       "Matteo Interlandi and Ari Ekmekji and Kshitij Shah and
                 Muhammad Ali Gulzar and Sai Deep Tetali and Miryung Kim
                 and Todd Millstein and Tyson Condie",
  title =        "Adding data provenance support to {Apache Spark}",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "595--615",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0474-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Debugging data processing logic in data-intensive
                 scalable computing (DISC) systems is a difficult and
                 time-consuming effort. Today's DISC systems offer very
                 little tooling for debugging programs, and as a result,
                 programmers spend countless hours collecting evidence
                 (e.g., from log files) and performing trial-and-error
                 debugging. To aid this effort, we built Titian, a
                 library that enables data provenance--tracking data
                 through transformations--in Apache Spark. Data
                 scientists using the Titian Spark extension will be
                 able to quickly identify the input data at the root
                 cause of a potential bug or outlier result. Titian is
                 built directly into the Spark platform and offers data
                 provenance support at interactive speeds--orders of
                 magnitude faster than alternative solutions--while
                 minimally impacting Spark job performance; observed
                 overheads for capturing data lineage rarely exceed 30\%
                 above the baseline job execution time.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Eich:2018:EGQ,
  author =       "Marius Eich and Pit Fender and Guido Moerkotte",
  title =        "Efficient generation of query plans containing
                 group-by, join, and groupjoin",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "617--641",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0476-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "It has been a recognized fact for many years that
                 query execution can benefit from pushing grouping
                 operators down in the operator tree and applying them
                 before a join. This so-called eager aggregation reduces
                 the size(s) of the join argument(s), making join
                 evaluation faster. Lately, the idea enjoyed a revival
                 when it was applied to outer joins for the first time
                 and incorporated in a state-of-the-art plan generator.
                 However, the recent approach is highly dependent on the
                 use of heuristics because of the exponential growth of
                 the search space that goes along with eager
                 aggregation. Finding an optimal solution for larger
                 queries calls for effective optimality-preserving
                 pruning mechanisms to reduce the search space size as
                 far as possible. By a more thorough investigation of
                 functional dependencies and keys, we provide a set of
                 new pruning criteria and extend the idea of eager
                 aggregation further by combining it with the
                 introduction of groupjoins. We evaluate the resulting
                 plan generator with respect to runtime and memory
                 consumption.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Leis:2018:QOT,
  author =       "Viktor Leis and Bernhard Radke and Andrey Gubichev and
                 Atanas Mirchev and Peter Boncz and Alfons Kemper and
                 Thomas Neumann",
  title =        "Query optimization through the looking glass, and what
                 we found running the {Join Order Benchmark}",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "643--668",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0480-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Finding a good join order is crucial for query
                 performance. In this paper, we introduce the Join Order
                 Benchmark that works on real-life data riddled with
                 correlations and introduces 113 complex join queries.
                 We experimentally revisit the main components in the
                 classic query optimizer architecture using a complex,
                 real-world data set and realistic multi-join queries.
                 For this purpose, we describe cardinality-estimate
                 injection and extraction techniques that allow us to
                 compare the cardinality estimators of multiple
                 industrial SQL implementations on equal footing, and to
                 characterize the value of having perfect cardinality
                 estimates. Our investigation shows that all
                 industrial-strength cardinality estimators routinely
                 produce large errors: though cardinality estimation
                 using table samples solves the problem for single-table
                 queries, there are still no techniques in industrial
                 systems that can deal accurately with join-crossing
                 correlated query predicates. We further show that while
                 estimates are essential for finding a good join order,
                 query performance is unsatisfactory if the query engine
                 relies too heavily on these estimates. Using another
                 set of experiments that measure the impact of the cost
                 model, we find that it has much less influence on query
                 performance than the cardinality estimates. We
                 investigate plan enumeration techniques comparing
                 exhaustive dynamic programming with heuristic
                 algorithms and find that exhaustive enumeration
                 improves performance despite the suboptimal cardinality
                 estimates. Finally, we extend our investigation from
                 main-memory only, to also include disk-based query
                 processing. Here, we find that though accurate
                 cardinality estimation should be the first priority,
                 other aspects such as modeling random versus sequential
                 I/O are also important to predict query runtime.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Makreshanski:2018:MQJ,
  author =       "Darko Makreshanski and Georgios Giannikis and Gustavo
                 Alonso and Donald Kossmann",
  title =        "Many-query join: efficient shared execution of
                 relational joins on modern hardware",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "669--692",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0475-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Database architectures typically process queries one
                 at a time, executing concurrent queries in independent
                 execution contexts. Often, such a design leads to
                 unpredictable performance and poor scalability. One
                 approach to circumvent the problem is to take advantage
                 of sharing opportunities across concurrently running
                 queries. In this paper, we propose many-query join
                 (MQJoin), a novel method for sharing the execution of a
                 join that can efficiently deal with hundreds of
                 concurrent queries. This is achieved by minimizing
                 redundant work and making efficient use of main-memory
                 bandwidth and multi-core architectures. Compared to
                 existing proposals, MQJoin is able to efficiently
                 handle larger workloads regardless of the schema by
                 exploiting more sharing opportunities. We also compared
                 MQJoin to two commercial main-memory column-store
                 databases. For a TPC-H-based workload, we show that
                 MQJoin provides 2---5 $\times$ higher throughput with
                 significantly more stable response times.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Brucato:2018:PQE,
  author =       "Matteo Brucato and Azza Abouzied and Alexandra
                 Meliou",
  title =        "Package queries: efficient and scalable computation of
                 high-order constraints",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "693--718",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0483-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Traditional database queries follow a simple model:
                 they define constraints that each tuple in the result
                 must satisfy. This model is computationally efficient,
                 as the database system can evaluate the query
                 conditions on each tuple individually. However, many
                 practical, real-world problems require a collection of
                 result tuples to satisfy constraints collectively,
                 rather than individually. In this paper, we present
                 package queries, a new query model that extends
                 traditional database queries to handle complex
                 constraints and preferences over answer sets. We
                 develop a full-fledged package query system,
                 implemented on top of a traditional database engine.
                 Our work makes several contributions. (1) We design
                 PaQL, a SQL-based query language that supports the
                 declarative specification of package queries. We prove
                 that PaQL is at least as expressive as integer linear
                 programming, and therefore, evaluation of package
                 queries is NP-hard. (2) We present a fundamental
                 evaluation strategy that combines the capabilities of
                 databases and constraint optimization solvers to derive
                 solutions to package queries. The core of our approach
                 is a set of translation rules that transform a package
                 query to an integer linear program. (3) We introduce an
                 offline data partitioning strategy allowing query
                 evaluation to scale to large data sizes. (4) We
                 introduce SketchRefine, a scalable algorithm for
                 package evaluation, with strong approximation
                 guarantees [(1 \pm \varepsilon )(1 ?)-factor
                 approximation]. (5) We present a method for
                 parallelizing the Refine phase of SketchRefine. (6) We
                 present an empirical study of the efficiency gains of
                 providing integer solvers with starting solutions. (7)
                 We present extensive experiments over real-world and
                 benchmark data. The results demonstrate that our
                 methods are effective at deriving high-quality package
                 results and achieve runtime performance that is an
                 order of magnitude faster than directly using ILP
                 solvers over large datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Elgohary:2018:CLA,
  author =       "Ahmed Elgohary and Matthias Boehm and Peter J. Haas
                 and Frederick R. Reiss and Berthold Reinwald",
  title =        "Compressed linear algebra for large-scale machine
                 learning",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "5",
  pages =        "719--744",
  month =        oct,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0478-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Oct 4 06:40:44 MDT 2018",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Large-scale machine learning algorithms are often
                 iterative, using repeated read-only data access and
                 I/O-bound matrix--vector multiplications to converge to
                 an optimal model. It is crucial for performance to fit
                 the data into single-node or distributed main memory
                 and enable fast matrix--vector operations on in-memory
                 data. General-purpose, heavy- and lightweight
                 compression techniques struggle to achieve both good
                 compression ratios and fast decompression speed to
                 enable block-wise uncompressed operations. Therefore,
                 we initiate work --- inspired by database compression
                 and sparse matrix formats --- on value-based compressed
                 linear algebra (CLA), in which heterogeneous,
                 lightweight database compression techniques are applied
                 to matrices, and then linear algebra operations such as
                 matrix--vector multiplication are executed directly on
                 the compressed representation. We contribute effective
                 column compression schemes, cache-conscious operations,
                 and an efficient sampling-based compression algorithm.
                 Our experiments show that CLA achieves in-memory
                 operations performance close to the uncompressed case
                 and good compression ratios, which enables fitting
                 substantially larger datasets into available memory. We
                 thereby obtain significant end-to-end performance
                 improvements up to $ 9.2 \times $.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chai:2018:POB,
  author =       "Chengliang Chai and Guoliang Li and Jian Li and Dong
                 Deng and Jianhua Feng",
  title =        "A partial-order-based framework for cost-effective
                 crowdsourced entity resolution",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "745--770",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0509-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Crowdsourced entity resolution has recently attracted
                 significant attentions because it can harness the
                 wisdom of crowd to improve the quality of entity
                 resolution. However, existing techniques either cannot
                 achieve high quality or incur huge monetary costs. To
                 address these problems, we propose a cost-effective
                 crowdsourced entity resolution framework, which
                 significantly reduces the monetary cost while keeping
                 high quality. We first define a partial order on the
                 pairs of records. Then, we select a pair as a question
                 and ask the crowd to check whether the records in the
                 pair refer to the same entity. After getting the answer
                 of this pair, we infer the answers of other pairs based
                 on the partial order. Next, we iteratively select pairs
                 without answers to ask until we get the answers of all
                 pairs. We devise effective algorithms to judiciously
                 select the pairs to ask in order to minimize the number
                 of asked pairs. To further reduce the cost, we propose
                 a grouping technique to group the pairs and we only ask
                 one pair instead of all pairs in each group. We develop
                 error-tolerant techniques to tolerate the errors
                 introduced by the partial order and the crowd. We also
                 study the budget-aware entity resolution, which, given
                 a budget, finds the maximum number of matching pairs
                 within the budget, and propose effective optimization
                 techniques. Experimental results show that our method
                 reduces the cost to 1.25\% of existing approaches (or
                 existing approaches take 80\times 80$ \times $ monetary
                 cost of our method) while not sacrificing the
                 quality.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Roblot:2018:PCC,
  author =       "Tania Roblot and Miika Hannula and Sebastian Link",
  title =        "Probabilistic Cardinality Constraints",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "771--795",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0511-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Probabilistic databases address the requirements of
                 applications that produce large collections of
                 uncertain data. They should provide declarative means
                 to control the integrity of data. Cardinality
                 constraints, in particular, control the occurrences of
                 data patterns by declaring in how many records a
                 combination of data values can occur. We propose
                 cardinality constraints on probabilistic data, which
                 stipulate lower bounds on the marginal probability by
                 which a cardinality constraint holds. We investigate
                 limits and opportunities for automating their use in
                 integrity control. This includes hardness results for
                 their validation, axiomatic and efficient algorithmic
                 characterisations of their implication problem, and an
                 algorithm that computes succinct semantic summaries for
                 any collection of these constraints. Experiments
                 complement our theoretical analysis on the time and
                 space complexity of computing semantic summaries,
                 suggesting that their computation provides the basis to
                 acquire meaningful constraints. We also establish
                 evidence that probabilistic functional and inclusion
                 dependencies cannot be managed as simply as
                 probabilistic cardinality constraints.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bress:2018:GCC,
  author =       "Sebastian Bre{\ss} and Bastian K{\"o}cher and Henning
                 Funke and Steffen Zeuch and Tilmann Rabl and Volker
                 Markl",
  title =        "Generating custom code for efficient query execution
                 on heterogeneous processors",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "797--822",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0512-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Processor manufacturers build increasingly specialized
                 processors to mitigate the effects of the power wall in
                 order to deliver improved performance. Currently,
                 database engines have to be manually optimized for each
                 processor which is a costly and error- prone process.
                 In this paper, we propose concepts to adapt to and to
                 exploit the performance enhancements of modern
                 processors automatically. Our core idea is to create
                 processor-specific code variants and to learn a
                 well-performing code variant for each processor. These
                 code variants leverage various parallelization
                 strategies and apply both generic- and
                 processor-specific code transformations. Our
                 experimental results show that the performance of code
                 variants may diverge up to two orders of magnitude. In
                 order to achieve peak performance, we generate custom
                 code for each processor. We show that our approach
                 finds an efficient custom code variant for multi-core
                 CPUs, GPUs, and MICs.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zoumpatianos:2018:GDS,
  author =       "Kostas Zoumpatianos and Yin Lou and Ioana Ileana and
                 Themis Palpanas and Johannes Gehrke",
  title =        "Generating data series query workloads",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "823--846",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0513-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Data series (including time series) has attracted lots
                 of interest in recent years. Most of the research has
                 focused on how to efficiently support similarity or
                 nearest neighbor queries over large data series
                 collections (an important data mining task), and
                 several data series summarization and indexing methods
                 have been proposed in order to solve this problem. Up
                 to this point, very little attention has been paid to
                 properly evaluating such index structures, with most
                 previous works relying solely on randomly selected data
                 series to use as queries. In this work, we show that
                 random workloads are inherently not suitable for the
                 task at hand and we argue that there is a need for
                 carefully generating query workloads. We define
                 measures that capture the characteristics of queries,
                 and we propose a method for generating workloads with
                 the desired properties, that is, effectively evaluating
                 and comparing data series summarizations and indexes.
                 In our experimental evaluation, with carefully
                 controlled query workloads, we shed light on key
                 factors affecting the performance of nearest neighbor
                 search in large data series collections. This is the
                 first paper that introduces a method for quantifying
                 hardness of data series queries, as well as the ability
                 to generate queries of predefined hardness.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{To:2018:SSM,
  author =       "Quoc-Cuong To and Juan Soto and Volker Markl",
  title =        "A survey of state management in big data processing
                 systems",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "847--872",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0514-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The concept of state and its applications vary widely
                 across big data processing systems. This is evident in
                 both the research literature and existing systems, such
                 as Apache Flink, Apache Heron, Apache Samza, Apache
                 Spark, and Apache Storm. Given the pivotal role that
                 state management plays, particularly, for iterative
                 batch and stream processing, in this survey, we present
                 examples of state as an enabler, discuss the
                 alternative approaches used to handle and implement
                 state, capture the many facets of state management, and
                 highlight new research directions. Our aim is to
                 provide insight into disparate state management
                 techniques, motivate others to pursue research in this
                 area, and draw attention to open problems.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2018:ACE,
  author =       "Yuchen Liu and Hai Liu and Dongqing Xiao and Mohamed
                 Y. Eltabakh",
  title =        "Adaptive correlation exploitation in big data query
                 optimization",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "873--898",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0515-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Correlations among the data attributes are abundant
                 and inherent in most application domains. These
                 correlations, if managed in systematic and efficient
                 ways, would enable various optimization opportunities.
                 Unfortunately, the state-of-art techniques are all
                 heavily tailored toward optimizing factors intrinsic to
                 relational databases, e.g., predicate selectivity,
                 random I/O accesses, and secondary indexes, which are
                 mostly not applicable to the modern big data
                 infrastructures, e.g., Hadoop and Spark. In this paper,
                 we propose the EXORD^++ system for exploiting the
                 data's correlations in big data query optimization.
                 EXORD^++ supports two types of correlations; hard
                 (which does not allow for exceptions) and soft (which
                 allows for exceptions). We introduce a three-phase
                 approach for managing soft correlations including: (1)
                 validating and judging the worthiness of soft
                 correlations, (2) selecting and preparing the soft
                 correlations for deployment, and (3) exploiting the
                 correlations in query optimization. EXORD^++ introduces
                 a novel cost-benefit model for adaptively selecting the
                 most beneficial soft correlations given a query
                 workload. We show the complexity of this problem
                 (NP-Hard) and propose a heuristic to efficiently solve
                 it in a polynomial time. Moreover, we present
                 incremental maintenance algorithms for efficiently
                 updating the system's state under data appends and
                 workload changes. EXORD^++ prototype is implemented as
                 an extension to the Hive engine on top of Hadoop. The
                 experimental evaluation shows the potential of EXORD^++
                 in achieving more than 10x speedup while introducing
                 minimal storage overheads.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2018:EEM,
  author =       "Tianzheng Wang and Ryan Johnson and Alan Fekete and
                 Ippokratis Pandis",
  title =        "Erratum to: {Efficiently making (almost) any
                 concurrency control mechanism serializable}",
  journal =      j-VLDB-J,
  volume =       "27",
  number =       "6",
  pages =        "899--900",
  month =        dec,
  year =         "2018",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-017-0471-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Wang:2017:EMA}.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Rahman:2019:OGF,
  author =       "Habibur Rahman and Senjuti Basu Roy and Saravanan
                 Thirumuruganathan and Sihem Amer-Yahia and Gautam Das",
  title =        "Optimized group formation for solving collaborative
                 tasks",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "1--23",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0516-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Many popular applications, such as collaborative
                 document editing, sentence translation, or citizen
                 science, resort to collaborative crowdsourcing, a
                 special form of human-based computing, where, crowd
                 workers with appropriate skills and expertise are
                 required to form groups to solve complex tasks. While
                 there has been extensive research on workers' task
                 assignment for traditional microtask-based
                 crowdsourcing, they often ignore the critical aspect of
                 collaboration. Central to any collaborative
                 crowdsourcing process is the aspect of solving
                 collaborative tasks that requires successful
                 collaboration among the workers. Our formalism
                 considers two main collaboration-related
                 factors--affinity and upper critical
                 mass--appropriately adapted from organizational science
                 and social theories. Our contributions are threefold.
                 First, we formalize the notion of collaboration among
                 crowd workers and propose a comprehensive optimization
                 model for task assignment in a collaborative
                 crowdsourcing environment. Next, we study the hardness
                 of the task assignment optimization problem and propose
                 a series of efficient exact and approximation
                 algorithms with provable theoretical guarantees.
                 Finally, we present a detailed set of experimental
                 results stemming from two real-world collaborative
                 crowdsourcing application using Amazon Mechanical
                 Turk.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wu:2019:VFS,
  author =       "Zhiqiang Wu and Kenli Li",
  title =        "{VBTree}: forward secure conjunctive queries over
                 encrypted data for cloud computing",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "25--46",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0517-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This paper concerns the fundamental problem of
                 processing conjunctive keyword queries over an
                 outsourced data table on untrusted public clouds in a
                 privacy-preserving manner. The data table can be
                 properly implemented with tree-based searchable
                 symmetric encryption schemes, such as the known Keyword
                 Red---Black tree and the Indistinguishable Bloom-filter
                 Tree in ICDE'17. However, as for these trees, there
                 still exist many limitations to support sub-linear time
                 updates. One of the reasons is that their tree branches
                 are directly exposed to the cloud. To achieve efficient
                 conjunctive queries while supporting dynamic updates,
                 we introduce a novel tree data structure called virtual
                 binary tree (VBTree). Our key design is to organize
                 indexing elements into the VBTree in a top-down
                 fashion, without storing any tree branches and tree
                 nodes. The tree only exists in a logical view, and all
                 of the elements are actually stored in a hash table. To
                 achieve forward privacy, which is discussed by Bost in
                 CCS'16, we also propose a storage mechanism called
                 version control repository (VCR), to record and control
                 versions of keywords and queries. VCR has a smaller
                 client-side storage compared to other forward-private
                 schemes. With our proposed approach, data elements can
                 be quickly searched while the index can be privately
                 updated. The security of the VBTree is formally proved
                 under the IND-CKA2 model. We test our scheme on a real
                 e-mail dataset and a user location dataset. The testing
                 results demonstrate its high efficiency and scalability
                 in both searching and updating processes.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lee:2019:PFP,
  author =       "Seokki Lee and Bertram Lud{\"a}scher and Boris
                 Glavic",
  title =        "{PUG}: a framework and practical implementation for
                 why and why-not provenance",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "47--71",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0518-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Explaining why an answer is (or is not) returned by a
                 query is important for many applications including
                 auditing, debugging data and queries, and answering
                 hypothetical questions about data. In this work, we
                 present the first practical approach for answering such
                 questions for queries with negation (first-order
                 queries). Specifically, we introduce a graph-based
                 provenance model that, while syntactic in nature,
                 supports reverse reasoning and is proven to encode a
                 wide range of provenance models from the literature.
                 The implementation of this model in our PUG (Provenance
                 Unification through Graphs) system takes a provenance
                 question and Datalog query as an input and generates a
                 Datalog program that computes an explanation, i.e., the
                 part of the provenance that is relevant to answer the
                 question. Furthermore, we demonstrate how a desirable
                 factorization of provenance can be achieved by
                 rewriting an input query. We experimentally evaluate
                 our approach demonstrating its efficiency.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2019:SSS,
  author =       "Wenlu Wang and Ji Zhang and Min-Te Sun and Wei-Shinn
                 Ku",
  title =        "A scalable spatial skyline evaluation system utilizing
                 parallel independent region groups",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "73--98",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0519-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "This research presents two parallel solutions to
                 efficiently address spatial skyline queries. First, we
                 propose a novel concept called independent regions for
                 parallelizing the process of spatial skyline
                 evaluation. Spatial skyline candidates in an
                 independent region do not depend on any data point in
                 other independent regions. Then, we propose a GPU-based
                 solution. We use multi-level independent region
                 group-based parallel filter to support efficient
                 multi-threading spatial skyline non-candidate
                 elimination. Beyond that, we propose comparable region
                 to accelerate non-candidate elimination in each
                 independent region. Secondly, we propose a
                 MapReduce-based solution. We generate the convex hull
                 of query points in the first MapReduce phase. In the
                 second phase, we calculate independent regions based on
                 the input data points and the convex hull of the query
                 points. With the independent regions, spatial skylines
                 are evaluated in parallel in the third phase, in which
                 data points are partitioned by their associated
                 independent regions in map functions, and spatial
                 skyline candidates are calculated by reduce functions.
                 The results of the spatial skyline queries are the
                 union of outputs from the reduce functions. Our
                 experimental results show that GPU multi-threading
                 scheme is very efficient on small-scale input datasets.
                 On the contrary, MapReduce scheme performs very well on
                 large-scale input datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2019:APS,
  author =       "Yue Wang and Lei Chen and Yulin Che and Qiong Luo",
  title =        "Accelerating pairwise {SimRank} estimation over static
                 and dynamic graphs",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "99--122",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0521-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Measuring similarities among different vertices is a
                 fundamental problem in graph analysis. Among different
                 similarity measurements, SimRank is one of the most
                 promising and popular. In reality, instead of computing
                 the whole similarity matrix, people often issue SimRank
                 queries in a pairwise manner, each of which needs to
                 estimate an approximate SimRank value within a
                 specified accuracy for a given pair of nodes. These
                 pairwise SimRank queries are often processed on
                 real-life graphs, which typically evolve over time,
                 requiring efficient algorithms that can query pairwise
                 SimRank under dynamic graph updates. However, current
                 single-pair SimRank solutions are either static or
                 inefficient in handling dynamic cases with good-quality
                 results. Observing that the sample size is the major
                 factor that determines the efficiency and the accuracy
                 in Monte Carlo methods to estimate pairwise SimRank, in
                 this paper, we propose three algorithms to query
                 pairwise SimRank over static and dynamic graphs
                 efficiently, by using different sample reduction
                 strategies. The accuracy of our algorithms is
                 guaranteed by the different invariants we propose for
                 pairwise SimRank. We show that our algorithms
                 outperform the state-of-the-art static and dynamic
                 solutions for pairwise SimRank estimation.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2019:EMC,
  author =       "Kaiqi Zhao and Gao Cong and Jin-Yao Chin and Rong
                 Wen",
  title =        "Exploring market competition over topics in
                 spatio-temporal document collections",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "1",
  pages =        "123--145",
  month =        feb,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0522-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Feb 5 08:07:20 MST 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "With the prominence of location-based services and
                 social networks in recent years, huge amounts of
                 spatio-temporal document collections (e.g., geo-tagged
                 tweets) have been generated. These data collections
                 often imply user's ideas on different products and thus
                 are helpful for business owners to explore hot topics
                 of their brands and the competition relation to other
                 brands in different spatial regions during different
                 periods. In this work, we aim to mine the topics and
                 the market competition of different brands over each
                 topic for a category of business (e.g., coffeehouses)
                 from spatio-temporal documents within a user-specified
                 region and time period. To support such spatio-temporal
                 search online in an exploratory manner, we propose a
                 novel framework equipped by (1) a generative model for
                 mining topics and market competition, (2) an
                 Octree-based off-line pre-training method for the model
                 and (3) an efficient algorithm for combining
                 pre-trained models to return the topics and market
                 competition on each topic within a user-specified pair
                 of region and time span. Extensive experiments show
                 that our framework is able to improve the runtime by up
                 to an order of magnitude compared with baselines while
                 achieving similar model quality in terms of training
                 log-likelihood.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Picado:2019:LSE,
  author =       "Jose Picado and Arash Termehchy and Alan Fern and
                 Parisa Ataei",
  title =        "Logical scalability and efficiency of relational
                 learning algorithms",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "147--171",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0523-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Relational learning algorithms learn the definition of
                 a new relation in terms of existing relations in the
                 database. The same database may be represented under
                 different schemas for various reasons, such as
                 efficiency, data quality, and usability. Unfortunately,
                 the output of current relational learning algorithms
                 tends to vary quite substantially over the choice of
                 schema, both in terms of learning accuracy and
                 efficiency. We introduce the property of schema
                 independence of relational learning algorithms, and
                 study both the theoretical and empirical dependence of
                 existing algorithms on the common class of (de)
                 composition schema transformations. We show
                 theoretically and empirically that current relational
                 learning algorithms are generally not schema
                 independent. We propose Castor, a relational learning
                 algorithm that achieves schema independence.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aluc:2019:BSC,
  author =       "G{\"u}nes Alu{\c{c}} and M. Tamer {\"O}zsu and
                 Khuzaima Daudjee",
  title =        "Building self-clustering {RDF} databases using
                 {Tunable-LSH}",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "173--195",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0530-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The Resource Description Framework (RDF) is a W3C
                 standard for representing graph-structured data, and
                 SPARQL is the standard query language for RDF. Recent
                 advances in information extraction, linked data
                 management and the Semantic Web have led to a rapid
                 increase in both the volume and the variety of RDF data
                 that are publicly available. As businesses start to
                 capitalize on RDF data, RDF data management systems are
                 being exposed to workloads that are far more diverse
                 and dynamic than what they were designed to handle.
                 Consequently, there is a growing need for developing
                 workload-adaptive and self-tuning RDF data management
                 systems. To realize this objective, we introduce a fast
                 and efficient method for dynamically clustering records
                 in an RDF data management system. Specifically, we
                 assume nothing about the workload upfront, but as
                 SPARQL queries are executed, we keep track of records
                 that are co-accessed by the queries in the workload and
                 physically cluster them. To decide dynamically and in
                 constant-time where a record needs to be placed in the
                 storage system, we develop a new locality-sensitive
                 hashing (LSH) scheme, Tunable-LSH. Using Tunable-LSH,
                 records that are co-accessed across similar sets of
                 queries can be hashed to the same or nearby physical
                 pages in the storage system. What sets Tunable-LSH
                 apart from existing LSH schemes is that it can
                 auto-tune to achieve the aforementioned clustering
                 objective with high accuracy even when the workloads
                 change. Experimental evaluation of Tunable-LSH in an
                 RDF data management system as well as in a standalone
                 hashtable shows end-to-end performance gains over
                 existing solutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2019:RTC,
  author =       "Xiangmin Zhou and Dong Qin and Lei Chen and Yanchun
                 Zhang",
  title =        "Real-time context-aware social media recommendation",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "197--219",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0524-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social media recommendation has attracted great
                 attention due to its wide applications in online
                 advertisement and entertainment, etc. Since contexts
                 highly affect social user preferences, great effort has
                 been put into context-aware recommendation in recent
                 years. However, existing techniques cannot capture the
                 optimal context information that is most discriminative
                 and compact from a large number of available features
                 flexibly for effective and efficient context-aware
                 social recommendation. To address this issue, we
                 propose a generic framework for context-aware
                 recommendation in shared communities, which exploits
                 the characteristics of media content and contexts.
                 Specifically, we first propose a novel approach based
                 on the correlation between a feature and a group of
                 other ones for selecting the optimal features used in
                 recommendation, which fully removes the redundancy.
                 Then, we propose a graph-based model called
                 content---context interaction graph, by analysing the
                 metadata content and social contexts, and the
                 interaction between attributes. Finally, we design
                 hash-based index over Apache Storm for organizing and
                 searching the media database in real time. Extensive
                 experiments have been conducted over large real media
                 collections to prove the high effectiveness and
                 efficiency of our proposed framework.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ntaflos:2019:UAB,
  author =       "Lefteris Ntaflos and George Trimponias and Dimitris
                 Papadias",
  title =        "A unified agent-based framework for constrained graph
                 partitioning",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "221--241",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0526-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Social networks offer various services such as
                 recommendations of social events, or delivery of
                 targeted advertising material to certain users. In this
                 work, we focus on a specific type of services modeled
                 as constrained graph partitioning (CGP). CGP assigns
                 users of a social network to a set of classes with
                 bounded capacities so that the similarity and the
                 social costs are minimized. The similarity cost is
                 proportional to the dissimilarity between a user and
                 his class, whereas the social cost is measured in terms
                 of friends that are assigned to different classes. In
                 this work, we investigate two solutions for CGP. The
                 first utilizes a game-theoretic framework, where each
                 user constitutes a player that wishes to minimize his
                 own social and similarity cost. The second employs
                 local search, and aims at minimizing the global cost.
                 We show that the two approaches can be unified under a
                 common agent-based framework that allows for two types
                 of deviations. In a unilateral deviation, an agent
                 switches to a new class, whereas in a bilateral
                 deviation a pair of agents exchange their classes. We
                 develop a number of optimization techniques to improve
                 result quality and facilitate efficiency. Our
                 experimental evaluation on real datasets demonstrates
                 that the proposed methods always outperform the state
                 of the art in terms of solution quality, while they are
                 up to an order of magnitude faster.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Omidvar-Tehrani:2019:UGA,
  author =       "Behrooz Omidvar-Tehrani and Sihem Amer-Yahia and Ria
                 Mae Borromeo",
  title =        "User group analytics: hypothesis generation and
                 exploratory analysis of user data",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "243--266",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0527-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "User data is becoming increasingly available in
                 multiple domains ranging from the social Web to retail
                 store receipts. User data is described by user
                 demographics (e.g., age, gender, occupation) and user
                 actions (e.g., rating a movie, publishing a paper,
                 following a medical treatment). The analysis of user
                 data is appealing to scientists who work on population
                 studies, online marketing, recommendations, and
                 large-scale data analytics. User data analytics usually
                 relies on identifying group-level behavior such as
                 ``Asian women who publish regularly in databases.''
                 Group analytics addresses peculiarities of user data
                 such as noise and sparsity to enable insights. In this
                 paper, we introduce a framework for user group
                 analytics by developing several components which cover
                 the life cycle of user groups. We provide two different
                 analytical environments to support ``hypothesis
                 generation'' and ``exploratory analysis'' on user
                 groups. Experiments on datasets with different
                 characteristics show the usability and efficiency of
                 our group analytics framework.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2019:LSR,
  author =       "Xubo Wang and Lu Qin and Xuemin Lin and Ying Zhang and
                 Lijun Chang",
  title =        "Leveraging set relations in exact and dynamic set
                 similarity join",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "2",
  pages =        "267--292",
  month =        apr,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0529-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon May 20 17:17:01 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Set similarity join, which finds all the similar set
                 pairs from two collections of sets, is a fundamental
                 problem with a wide range of applications. Existing
                 works study both exact set similarity join and
                 approximate similarity join problems. In this paper, we
                 focus on the exact set similarity join problem. The
                 existing solutions for exact set similarity join follow
                 a filtering-verification framework, which generates a
                 list of candidate pairs through scanning indexes in the
                 filtering phase and reports those similar pairs in the
                 verification phase. Though much research has been
                 conducted on this problem, set relations have not been
                 well studied on improving the algorithm efficiency
                 through computational cost sharing. Therefore, in this
                 paper, we explore the set relations in different levels
                 to reduce the overall computational cost. First, it has
                 been shown that most of the computational time is spent
                 on the filtering phase, which can be quadratic to the
                 number of sets in the worst case for the existing
                 solutions. Thus, we explore index-level set relations
                 to reduce the filtering cost while keeping the same
                 filtering power. We achieve this by grouping related
                 sets into blocks in the index and skipping useless
                 index probes in joins. Second, we explore answer-level
                 set relations to further improve the algorithm based on
                 the intuition that if two sets are similar, their
                 answers may have a large overlap. We derive an
                 algorithm which incrementally generates the answer of
                 one set from an already computed answer of another
                 similar set rather than compute the answer from scratch
                 to reduce the computational cost. In addition,
                 considering that in real applications, the data are
                 usually updated dynamically, we extend our techniques
                 and design efficient algorithms to incrementally update
                 the join result when any element in the sets is
                 updated. Finally, we conduct extensive performance
                 studies using 21 real datasets with various data
                 properties from a wide range of domains. The
                 experimental results demonstrate that our algorithm
                 outperforms all the existing algorithms across all
                 datasets.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Amer-Yahia:2019:TID,
  author =       "Sihem Amer-Yahia and Lei Chen and Ren{\'e}e J.
                 Miller",
  title =        "Thematic issue on data management for graphs",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "293--294",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00543-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cebiric:2019:SSG,
  author =       "{\v{S}}ejla Cebiri{\'c} and Fran{\c{c}}ois
                 Goasdou{\'e} and Haridimos Kondylakis and Dimitris
                 Kotzinos and Ioana Manolescu and Georgia Troullinou and
                 Mussab Zneika",
  title =        "Summarizing semantic graphs: a survey",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "295--327",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0528-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "The explosion in the amount of the available RDF data
                 has lead to the need to explore, query and understand
                 such data sources. Due to the complex structure of RDF
                 graphs and their heterogeneity, the exploration and
                 understanding tasks are significantly harder than in
                 relational databases, where the schema can serve as a
                 first step toward understanding the structure.
                 Summarization has been applied to RDF data to
                 facilitate these tasks. Its purpose is to extract
                 concise and meaningful information from RDF knowledge
                 bases, representing their content as faithfully as
                 possible. There is no single concept of RDF summary,
                 and not a single but many approaches to build such
                 summaries; each is better suited for some uses, and
                 each presents specific challenges with respect to its
                 construction. This survey is the first to provide a
                 comprehensive survey of summarization method for
                 semantic RDF graphs. We propose a taxonomy of existing
                 works in this area, including also some closely related
                 works developed prior to the adoption of RDF in the
                 data management community; we present the concepts at
                 the core of each approach and outline their main
                 technical aspects and implementation. We hope the
                 survey will help readers understand this scientifically
                 rich area and identify the most pertinent summarization
                 method for a variety of usage scenarios.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Demirci:2019:CAP,
  author =       "Gunduz Vehbi Demirci and Hakan Ferhatosmanoglu and
                 Cevdet Aykanat",
  title =        "Cascade-aware partitioning of large graph databases",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "329--350",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0531-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graph partitioning is an essential task for scalable
                 data management and analysis. The current partitioning
                 methods utilize the structure of the graph, and the
                 query log if available. Some queries performed on the
                 database may trigger further operations. For example,
                 the query workload of a social network application may
                 contain re-sharing operations in the form of cascades.
                 It is beneficial to include the potential cascades in
                 the graph partitioning objectives. In this paper, we
                 introduce the problem of cascade-aware graph
                 partitioning that aims to minimize the overall cost of
                 communication among parts/servers during cascade
                 processes. We develop a randomized solution that
                 estimates the underlying cascades, and use it as an
                 input for partitioning of large-scale graphs.
                 Experiments on 17 real social networks demonstrate the
                 effectiveness of the proposed solution in terms of the
                 partitioning objectives.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Das:2019:IMM,
  author =       "Apurba Das and Michael Svendsen and Srikanta
                 Tirthapura",
  title =        "Incremental maintenance of maximal cliques in a
                 dynamic graph",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "351--375",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00540-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "We consider the maintenance of the set of all maximal
                 cliques in a dynamic graph that is changing through the
                 addition or deletion of edges. We present nearly tight
                 bounds on the magnitude of change in the set of maximal
                 cliques when edges are added to the graph, as well as
                 the first change-sensitive algorithm for incremental
                 clique maintenance under edge additions, whose runtime
                 is proportional to the magnitude of the change in the
                 set of maximal cliques, when the number of edges added
                 is small. Our algorithm can also be applied to the
                 decremental case, when edges are deleted from the
                 graph. We present experimental results showing these
                 algorithms are efficient in practice and are faster
                 than prior work by two to three orders of magnitude.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wen:2019:ESG,
  author =       "Dong Wen and Lu Qin and Ying Zhang and Lijun Chang and
                 Xuemin Lin",
  title =        "Efficient structural graph clustering: an index-based
                 approach",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "377--399",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00541-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Graph clustering is a fundamental problem widely
                 applied in many applications. The structural graph
                 clustering (\mathsf {SCAN}SCAN) method obtains not only
                 clusters but also hubs and outliers. However, the
                 clustering results heavily depend on two parameters,
                 \epsilon \in and \mu \mu , while the optimal parameter
                 setting depends on different graph properties and
                 various user requirements. In addition, all existing
                 \mathsf {SCAN}SCAN solutions need to scan at least the
                 whole graph, even if only a small number of vertices
                 belong to clusters. In this paper, we propose an
                 index-based method for \mathsf {SCAN}SCAN. Based on our
                 index, we cluster the graph for any \epsilon \in and
                 \mu \mu in O(\sum _{C\in \mathbb {C}}|E_C|)O(?C?C|EC|)
                 time, where \mathbb {C} C is the result set of all
                 clusters and |E_C||EC| is the number of edges in a
                 specific cluster CC. In other words, the time spent on
                 computing structural clustering depends only on the
                 result size, not on the size of the original graph. Our
                 index's space complexity is O(m), where m is the number
                 of edges in the graph. To handle dynamic graph updates,
                 we propose algorithms and several optimization
                 techniques for maintaining our index. We also design an
                 index for I/O efficient query processing. We conduct
                 extensive experiments to evaluate the performance of
                 all our proposed algorithms on 10 real-world networks,
                 with the largest one containing more than 1 billion
                 edges. The experimental results demonstrate that our
                 approaches significantly outperform existing
                 solutions.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yu:2019:SES,
  author =       "Weiren Yu and Xuemin Lin and Wenjie Zhang and Jian Pei
                 and Julie A. Mccann",
  title =        "{SimRank*}: effective and scalable pairwise similarity
                 search based on graph topology",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "3",
  pages =        "401--426",
  month =        jun,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0536-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Wed Oct 2 07:30:39 MDT 2019",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  abstract =     "Given a graph, how can we quantify similarity between
                 two nodes in an effective and scalable way? SimRank is
                 an attractive measure of pairwise similarity based on
                 graph topologies. Its underpinning philosophy that
                 ``two nodes are similar if they are pointed to (have
                 incoming edges) from similar nodes'' can be regarded as
                 an aggregation of similarities based on incoming paths.
                 Despite its popularity in various applications (e.g.,
                 web search and social networks), SimRank has an
                 undesirable trait, i.e., ``zero-similarity'': it
                 accommodates only the paths of equal length from a
                 common ``center'' node, whereas a large portion of
                 other paths are fully ignored. In this paper, we
                 propose an effective and scalable similarity model,
                 SimRank*, to remedy this problem. (1) We first provide
                 a sufficient and necessary condition of the
                 ``zero-similarity'' problem that exists in Jeh and
                 Widom's SimRank model, Li et al. 's SimRank model,
                 Random Walk with Restart (RWR), and ASCOS++. (2) We
                 next present our treatment, SimRank*, which can resolve
                 this issue while inheriting the merit of the simple
                 SimRank philosophy. (3) We reduce the series form of
                 SimRank* to a closed form, which looks simpler than
                 SimRank but which enriches semantics without suffering
                 from increased computational overhead. This leads to an
                 iterative form of SimRank*, which requires O(Knm) time
                 and O(n^2)O(n2) memory for computing all (n^2)(n2)
                 pairs of similarities on a graph of n nodes and m edges
                 for K iterations. (4) To improve the computational time
                 of SimRank* further, we leverage a novel clustering
                 strategy via edge concentration. Due to its
                 NP-hardness, we devise an efficient heuristic to speed
                 up all-pairs SimRank* computation to
                 O(Kn{\tilde{m}})O(Knm~) time, where {\tilde{m}}m~ is
                 generally much smaller than m. (5) To scale SimRank* on
                 billion-edge graphs, we propose two memory-efficient
                 single-source algorithms, i.e., ss-gSR* for geometric
                 SimRank*, and ss-eSR* for exponential SimRank*, which
                 can retrieve similarities between all n nodes and a
                 given query on an as-needed basis. This significantly
                 reduces the O(n^2)O(n2) memory of all-pairs search to
                 either O(Kn + {\tilde{m}})O(Kn+m~) for geometric
                 SimRank*, or O(n + {\tilde{m}})O(n+m~) for exponential
                 SimRank*, without any loss of accuracy, where
                 {\tilde{m}} \ll n^2m~?n2. (6) We also compare SimRank*
                 with another remedy of SimRank that adds self-loops on
                 each node and demonstrate that SimRank* is more
                 effective. (7) Using real and synthetic datasets, we
                 empirically verify the richer semantics of SimRank*,
                 and validate its high computational efficiency and
                 scalability on large graphs with billions of edges.",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Valdes:2019:FEM,
  author =       "Fabio Vald{\'e}s and Ralf Hartmut G{\"u}ting",
  title =        "A framework for efficient multi-attribute movement
                 data analysis",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "427--449",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0525-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-018-0525-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Psaropoulos:2019:ICS,
  author =       "Georgios Psaropoulos and Thomas Legler and Norman May
                 and Anastasia Ailamaki",
  title =        "Interleaving with coroutines: a systematic and
                 practical approach to hide memory latency in index
                 joins",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "451--471",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0533-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-018-0533-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bronselaer:2019:CRT,
  author =       "Antoon Bronselaer and Christophe Billiet and Robin {De
                 Mol} and Joachim Nielandt and Guy {De Tr{\'e}}",
  title =        "Compact representations of temporal databases",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "473--496",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0535-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-018-0535-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Baazizi:2019:PSI,
  author =       "Mohamed-Amine Baazizi and Dario Colazzo and Giorgio
                 Ghelli and Carlo Sartiani",
  title =        "Parametric schema inference for massive {JSON}
                 datasets",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "497--521",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0532-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-018-0532-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2019:SCG,
  author =       "Yuan Li and Ahmed Eldawy and Jie Xue and Nadezda
                 Knorozova and Mohamed F. Mokbel and Ravi Janardan",
  title =        "Scalable computational geometry in {MapReduce}",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "523--548",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-018-0534-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-018-0534-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Park:2019:FSM,
  author =       "Namyong Park and Sejoon Oh and U Kang",
  title =        "Fast and scalable method for distributed {Boolean}
                 tensor factorization",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "549--574",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00538-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00538-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Frazzetto:2019:PAS,
  author =       "Davide Frazzetto and Thomas Dyhre Nielsen and Torben
                 Bach Pedersen and Laurynas {\v{S}}ik{\v{s}}nys",
  title =        "Prescriptive analytics: a survey of emerging trends
                 and technologies",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "575--595",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00539-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00539-y",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2019:FDC,
  author =       "Rong Zhu and Zhaonian Zou and Jianzhong Li",
  title =        "Fast diversified coherent core search on multi-layer
                 graphs",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "4",
  pages =        "597--622",
  month =        aug,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00542-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00542-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{AlHasanHaldar:2019:LPL,
  author =       "Nur {Al Hasan Haldar} and Jianxin Li and Mark Reynolds
                 and Timos Sellis and Jeffrey Xu Yu",
  title =        "Location prediction in large-scale social networks: an
                 in-depth benchmarking study",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "623--648",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00553-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00553-0",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fang:2019:EPM,
  author =       "Yixiang Fang and Yun Li and Reynold Cheng and Nikos
                 Mamoulis and Gao Cong",
  title =        "Evaluating pattern matching queries for spatial
                 databases",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "649--673",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00550-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00550-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kim:2019:ETD,
  author =       "Jinhyun Kim and Jun-Ki Min and Kyuseok Shim",
  title =        "Efficient two-dimensional {Haar$^+$} synopsis
                 construction for the maximum absolute error measure",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "675--701",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00551-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00551-2",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Theocharidis:2019:SEM,
  author =       "Konstantinos Theocharidis and John Liagouris and Nikos
                 Mamoulis and Panagiotis Bouros and Manolis Terrovitis",
  title =        "{SRX}: efficient management of spatial {RDF} data",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "703--733",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00554-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00554-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2019:FAS,
  author =       "Tong Yang and Jie Jiang and Yang Zhou and Long He and
                 Jinyang Li and Bin Cui and Steve Uhlig and Xiaoming
                 Li",
  title =        "Fast and accurate stream processing by filtering the
                 cold",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "735--763",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00560-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00560-1",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2019:ESW,
  author =       "Wentao Li and Miao Qiao and Lu Qin and Ying Zhang and
                 Lijun Chang and Xuemin Lin",
  title =        "Eccentricities on small-world networks",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "765--792",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00566-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00566-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Affolter:2019:CSR,
  author =       "Katrin Affolter and Kurt Stockinger and Abraham
                 Bernstein",
  title =        "A comparative survey of recent natural language
                 interfaces for databases",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "793--819",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00567-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00567-8;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00567-8.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Cheng:2019:PCD,
  author =       "Zhinan Cheng and Qun Huang and Patrick P. C. Lee",
  title =        "On the performance and convergence of distributed
                 stream processing via approximate fault tolerance",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "5",
  pages =        "821--846",
  month =        oct,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00565-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00565-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kondylakis:2019:CSS,
  author =       "Haridimos Kondylakis and Niv Dayan and Kostas
                 Zoumpatianos and Themis Palpanas",
  title =        "{Coconut}: sortable summarizations for scalable
                 indexes over static and streaming data series",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "847--869",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00573-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00573-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2019:EDR,
  author =       "Tianming Zhang and Yunjun Gao and Lu Chen and Wei Guo
                 and Shiliang Pu and Baihua Zheng and Christian S.
                 Jensen",
  title =        "Efficient distributed reachability querying of massive
                 temporal graphs",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "871--896",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00572-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00572-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lin:2019:OPT,
  author =       "Xuelian Lin and Jiahao Jiang and Shuai Ma and Yimeng
                 Zuo and Chunming Hu",
  title =        "One-pass trajectory simplification using the
                 synchronous {Euclidean} distance",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "897--921",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00575-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00575-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2019:PAS,
  author =       "Runhui Wang and Sibo Wang and Xiaofang Zhou",
  title =        "Parallelizing approximate single-source personalized
                 {PageRank} queries on shared memory",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "923--940",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00576-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00576-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Langdale:2019:PGJ,
  author =       "Geoff Langdale and Daniel Lemire",
  title =        "Parsing gigabytes of {JSON} per second",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "941--960",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00578-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00578-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ren:2019:SQI,
  author =       "Weilong Ren and Xiang Lian and Kambiz Ghazinour",
  title =        "Skyline queries over incomplete data streams",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "961--985",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00577-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00577-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2019:ECD,
  author =       "Fan Zhang and Xuemin Lin and Ying Zhang and Lu Qin and
                 Wenjie Zhang",
  title =        "Efficient community discovery with user engagement and
                 similarity",
  journal =      j-VLDB-J,
  volume =       "28",
  number =       "6",
  pages =        "987--1012",
  month =        dec,
  year =         "2019",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00579-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:21 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00579-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2020:VSS,
  author =       "Lei Chen and Sihem Amer-Yahia",
  title =        "{VLDB SI} survey editorial",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00598-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00598-1;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00598-1.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Su:2020:STD,
  author =       "Han Su and Shuncheng Liu and Bolong Zheng and Xiaofang
                 Zhou and Kai Zheng",
  title =        "A survey of trajectory distance measures and
                 performance evaluation",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "3--32",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00574-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00574-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fang:2020:MDA,
  author =       "Jian Fang and Yvo T. B. Mulder and Jan Hidders and
                 Jinho Lee and H. Peter Hofstee",
  title =        "In-memory database acceleration on {FPGAs}: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "33--59",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00581-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00581-w;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00581-w.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Malliaros:2020:CDN,
  author =       "Fragkiskos D. Malliaros and Christos Giatsidis and
                 Apostolos N. Papadopoulos and Michalis Vazirgiannis",
  title =        "The core decomposition of networks: theory, algorithms
                 and applications",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "61--92",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00587-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00587-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Qin:2020:MDV,
  author =       "Xuedi Qin and Yuyu Luo and Nan Tang and Guoliang Li",
  title =        "Making data visualization more efficient and
                 effective: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "93--117",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00588-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00588-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Rahman:2020:EID,
  author =       "Protiva Rahman and Lilong Jiang and Arnab Nandi",
  title =        "Evaluating interactive data systems",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "119--146",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00589-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00589-2",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xie:2020:ESR,
  author =       "Min Xie and Raymond Chi-Wing Wong and Ashwin Lall",
  title =        "An experimental survey of regret minimization query
                 and variants: bridging the best worlds between top-$k$
                 query and skyline query",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "147--175",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00570-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00570-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Magdy:2020:MDM,
  author =       "Amr Magdy and Laila Abdelhafeez and Yunfan Kang and
                 Eric Ong and Mohamed F. Mokbel",
  title =        "Microblogs data management: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "177--216",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00569-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00569-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tong:2020:SCS,
  author =       "Yongxin Tong and Zimu Zhou and Yuxiang Zeng and Lei
                 Chen and Cyrus Shahabi",
  title =        "Spatial crowdsourcing: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "217--250",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00568-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00568-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chapman:2020:DSS,
  author =       "Adriane Chapman and Elena Simperl and Laura Koesten
                 and George Konstantinidis and Luis-Daniel
                 Ib{\'a}{\~n}ez and Emilia Kacprzak and Paul Groth",
  title =        "Dataset search: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "251--272",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00564-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00564-x;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00564-x.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fevgas:2020:IFS,
  author =       "Athanasios Fevgas and Leonidas Akritidis and
                 Panayiotis Bozanis and Yannis Manolopoulos",
  title =        "Indexing in flash storage devices: a survey on
                 challenges, current approaches, and future trends",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "273--311",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00559-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00559-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Giatrakos:2020:CER,
  author =       "Nikos Giatrakos and Elias Alevizos and Alexander
                 Artikis and Antonios Deligiannakis and Minos
                 Garofalakis",
  title =        "Complex event recognition in the {Big Data} era: a
                 survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "313--352",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00557-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00557-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fang:2020:SCS,
  author =       "Yixiang Fang and Xin Huang and Lu Qin and Ying Zhang
                 and Wenjie Zhang and Reynold Cheng and Xuemin Lin",
  title =        "A survey of community search over big graphs",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "353--392",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00556-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Fang:2020:CSC}.",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00556-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Luo:2020:LBS,
  author =       "Chen Luo and Michael J. Carey",
  title =        "{LSM}-based storage techniques: a survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "393--418",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00555-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00555-y",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Blumenthal:2020:CHG,
  author =       "David B. Blumenthal and Nicolas Boria and Johann
                 Gamper and S{\'e}bastien Bougleux and Luc Brun",
  title =        "Comparing heuristics for graph edit distance
                 computation",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "419--458",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00544-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00544-1",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2020:EMM,
  author =       "Xinhong Chen and Qing Li",
  title =        "Event modeling and mining: a long journey toward
                 explainable events",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "459--482",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00545-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00545-0",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Boncz:2020:SIB,
  author =       "Peter Boncz and Kenneth Salem",
  title =        "Special issue on best papers of {VLDB 2017}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "483--484",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00600-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00600-w;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00600-w.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Deutch:2020:ENL,
  author =       "Daniel Deutch and Nave Frost and Amir Gilad",
  title =        "Explaining Natural Language query results",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "485--508",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00584-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00584-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2020:VOD,
  author =       "Silu Huang and Liqi Xu and Jialin Liu and Aaron J.
                 Elmore and Aditya Parameswaran",
  title =        "{$ \varvec  {{\sc Orpheus}} $DB}: bolt-on
                 versioning for relational databases (extended
                 version)",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "509--538",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00594-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00594-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Orr:2020:EPA,
  author =       "Laurel Orr and Magdalena Balazinska and Dan Suciu",
  title =        "{EntropyDB}: a probabilistic approach to approximate
                 query processing",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "1",
  pages =        "539--567",
  month =        jan,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00582-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00582-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Amer-Yahia:2020:VSE,
  author =       "Sihem Amer-Yahia and Jian Pei",
  title =        "{VLDB SI 2018} editorial",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "593--594",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00599-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00599-0;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00599-0.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sahu:2020:ULG,
  author =       "Siddhartha Sahu and Amine Mhedhbi and Semih Salihoglu
                 and Jimmy Lin and M. Tamer {\"O}zsu",
  title =        "The ubiquity of large graphs and surprising challenges
                 of graph processing: extended survey",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "595--618",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00548-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00548-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Idris:2020:GDY,
  author =       "Muhammad Idris and Mart{\'{\i}}n Ugarte and Stijn
                 Vansummeren and Hannes Voigt and Wolfgang Lehner",
  title =        "General dynamic {Yannakakis}: conjunctive queries with
                 theta joins under updates",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "619--653",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00590-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00590-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bonifati:2020:ASL,
  author =       "Angela Bonifati and Wim Martens and Thomas Timm",
  title =        "An analytical study of large {SPARQL} query logs",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "655--679",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00558-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00558-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Asudeh:2020:SAS,
  author =       "Abolfazl Asudeh and Jees Augustine and Azade Nazi and
                 Saravanan Thirumuruganathan and Nan Zhang and Gautam
                 Das and Divesh Srivastava",
  title =        "Scalable algorithms for signal reconstruction by
                 leveraging similarity joins",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "681--707",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00562-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00562-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ratner:2020:SRT,
  author =       "Alexander Ratner and Stephen H. Bach and Henry
                 Ehrenberg and Jason Fries and Sen Wu and Christopher
                 R{\'e}",
  title =        "{Snorkel}: rapid training data creation with weak
                 supervision",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "709--730",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00552-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00552-1;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00552-1.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Breslow:2020:MFF,
  author =       "Alex D. Breslow and Nuwan S. Jayasena",
  title =        "{Morton} filters: fast, compressed sparse cuckoo
                 filters",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "731--754",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00561-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00561-0",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Salem:2020:SIB,
  author =       "Kenneth Salem",
  title =        "Special issue on best papers of {DaMoN 2018}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "755--755",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00597-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00597-2;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00597-2.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lang:2020:MMY,
  author =       "Harald Lang and Linnea Passing and Andreas Kipf and
                 Peter Boncz and Thomas Neumann and Alfons Kemper",
  title =        "Make the most out of your {SIMD} investments: counter
                 control flow divergence in compiled query pipelines",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "757--774",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00547-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00547-y;
                 http://link.springer.com/content/pdf/10.1007/s00778-019-00547-y.pdf",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zarubin:2020:ECN,
  author =       "Mikhail Zarubin and Thomas Kissinger and Dirk Habich
                 and Thomas Willhalm and Wolfgang Lehner",
  title =        "Efficient compute node-local replication mechanisms
                 for {NVRAM}-centric data structures",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "775--795",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00549-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00549-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pohl:2020:JHB,
  author =       "Constantin Pohl and Kai-Uwe Sattler and Goetz Graefe",
  title =        "Joins on high-bandwidth memory: a new level in the
                 memory hierarchy",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "2--3",
  pages =        "797--817",
  month =        may,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00546-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Mar 19 17:10:22 MDT 2020",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "http://link.springer.com/article/10.1007/s00778-019-00546-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pedersen:2020:FSR,
  author =       "Simon Aagaard Pedersen and Bin Yang and Christian S.
                 Jensen",
  title =        "Fast stochastic routing under time-varying
                 uncertainty",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "4",
  pages =        "819--839",
  month =        jul,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00585-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00585-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 31 October 2019 Pages: 819 - 839",
}

@Article{Xu:2020:EPM,
  author =       "Hongfei Xu and Yu Gu and Rui Zhang",
  title =        "Efficient processing of moving collective spatial
                 keyword queries",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "4",
  pages =        "841--865",
  month =        jul,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00583-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00583-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 01 November 2019 Pages: 841 - 865",
}

@Article{Geerts:2020:CDL,
  author =       "Floris Geerts and Giansalvatore Mecca and Donatello
                 Santoro",
  title =        "Cleaning data with {Llunatic}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "4",
  pages =        "867--892",
  month =        jul,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00586-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00586-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 08 November 2019 Pages: 867 - 892",
}

@Article{Wu:2020:TRS,
  author =       "Dingming Wu and Hao Zhou and Nikos Mamoulis",
  title =        "Top-$k$ relevant semantic place retrieval on
                 spatiotemporal {RDF} data",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "4",
  pages =        "893--917",
  month =        jul,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00591-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00591-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 19 November 2019 Pages: 893 - 917",
}

@Article{Qin:2020:EQA,
  author =       "Jianbin Qin and Chuan Xiao and Kunihiko Sadakane",
  title =        "Efficient query autocompletion with edit
                 distance-based error tolerance",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "4",
  pages =        "919--943",
  month =        jul,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00595-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:39 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00595-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 14 December 2019 Pages: 919 - 943",
}

@Article{Jiang:2020:SCS,
  author =       "Jiawei Jiang and Fangcheng Fu and Bin Cui",
  title =        "{SKCompress}: compressing sparse and nonuniform
                 gradient in distributed machine learning",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "945--972",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00596-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00596-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 01 January 2020 Pages: 945 - 972",
}

@Article{Wang:2020:FEF,
  author =       "Chaohui Wang and Miao Xie and Shuigeng Zhou",
  title =        "{FERRARI}: an efficient framework for visual
                 exploratory subgraph search in graph databases",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "973--998",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00601-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00601-0",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 30 January 2020 Pages: 973 - 998",
}

@Article{Chang:2020:EMC,
  author =       "Lijun Chang",
  title =        "Efficient maximum clique computation and enumeration
                 over large sparse graphs",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "999--1022",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00602-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00602-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 15 February 2020 Pages: 999 - 1022",
}

@Article{Chondrogiannis:2020:FSP,
  author =       "Theodoros Chondrogiannis and Panagiotis Bouros and
                 David B. Blumenthal",
  title =        "Finding $k$-shortest paths with limited overlap",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1023--1047",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00604-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00604-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 21 February 2020 Pages: 1023 - 1047",
}

@Article{Zou:2020:ADS,
  author =       "Jia Zou and Arun Iyengar and Chris Jermaine",
  title =        "Architecture of a distributed storage that combines
                 file system, memory and computation in a single layer",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1049--1073",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00605-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00605-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 26 February 2020 Pages: 1049 - 1073",
}

@Article{Liu:2020:ECC,
  author =       "Boge Liu and Long Yuan and Jingren Zhou",
  title =        "Efficient $ (\alpha, \beta)$-core computation in
                 bipartite graphs",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1075--1099",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00606-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00606-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 04 March 2020 Pages: 1075 - 1099",
}

@Article{Chen:2020:TTP,
  author =       "Lisi Chen and Shuo Shang and Ling Shao",
  title =        "Top-$k$ term publish/subscribe for geo-textual data
                 streams",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1101--1128",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00607-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00607-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 09 March 2020 Pages: 1101 - 1128",
}

@Article{Yang:2020:TFA,
  author =       "Fan Yang and Faisal M. Almutairi and Vladimir
                 Zadorozhny",
  title =        "{TurboLift}: fast accuracy lifting for historical data
                 recovery",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1129--1148",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00609-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Yang:2024:CTF}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00609-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 09 March 2020 Pages: 1129 - 1148",
}

@Article{Guo:2020:CAP,
  author =       "Chenjuan Guo and Bin Yang and Lu Chen",
  title =        "Context-aware, preference-based vehicle routing",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1149--1170",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00608-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00608-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 11 March 2020 Pages: 1149 - 1170",
}

@Article{Cai:2020:DSK,
  author =       "Zhi Cai and Georgios Kalamatianos and Dimitris
                 Papadias",
  title =        "Diversified spatial keyword search on {RDF} data",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1171--1189",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00610-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00610-z",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 12 March 2020 Pages: 1171 - 1189",
}

@Article{Goasdoue:2020:RGS,
  author =       "Fran{\c{c}}ois Goasdou{\'e} and Pawe{\l} Guzewicz and
                 Ioana Manolescu",
  title =        "{RDF} graph summarization for first-sight structure
                 discovery",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1191--1218",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00611-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00611-y",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 30 April 2020 Pages: 1191 - 1218",
}

@Article{Fang:2020:CSC,
  author =       "Yixiang Fang and Xin Huang and Xuemin Lin",
  title =        "Correction: {A survey of community search over big
                 graphs}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "5",
  pages =        "1219--1219",
  month =        sep,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-019-00592-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Fang:2020:SCS}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-019-00592-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 11 November 2019 Pages: 1219 - 1219",
}

@Article{Aboulnaga:2020:SIB,
  author =       "Ashraf Aboulnaga",
  title =        "Special issue on the best papers of {DaMoN 2019}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1221--1221",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00629-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00629-2",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 05 September 2020 Pages: 1221 - 1221",
}

@Article{vanRenen:2020:BBP,
  author =       "Alexander van Renen and Lukas Vogel and Alfons
                 Kemper",
  title =        "Building blocks for persistent memory",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1223--1241",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00622-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00622-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 23 September 2020 Pages: 1223 - 1241",
}

@Article{Polychroniou:2020:VSV,
  author =       "Orestis Polychroniou and Kenneth A. Ross",
  title =        "{VIP}: A {SIMD} vectorized analytical query engine",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1243--1261",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00621-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00621-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 13 July 2020 Pages: 1243 - 1261",
}

@Article{Lasch:2020:FSS,
  author =       "Robert Lasch and Ismail Oukid and Kai-Uwe Sattler",
  title =        "Faster \& strong: string dictionary compression using
                 sampling and fast vectorized decompression",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1263--1285",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00620-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00620-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 20 July 2020 Pages: 1263 - 1285",
}

@Article{Kruse:2020:RDJ,
  author =       "Sebastian Kruse and Zoi Kaoudi and Jorge-Arnulfo
                 Quian{\'e}-Ruiz",
  title =        "{RHEEMix} in the data jungle: a cost-based optimizer
                 for cross-platform systems",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1287--1310",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00612-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00612-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 18 May 2020 Pages: 1287 - 1310",
}

@Article{Yang:2020:GBF,
  author =       "Jingru Yang and Ju Fan and Xiaoyong Du",
  title =        "A game-based framework for crowdsourced data
                 labeling",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1311--1336",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00613-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00613-w",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 19 May 2020 Pages: 1311 - 1336",
}

@Article{Jacobs:2020:BBB,
  author =       "Steven Jacobs and Xikui Wang and Md Yusuf Sarwar
                 Uddin",
  title =        "{BAD} to the bone: {Big Active Data} at its core",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1337--1364",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00616-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00616-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 23 May 2020 Pages: 1337 - 1364",
}

@Article{Sun:2020:TSI,
  author =       "Tao Sun and Hongbo Liu and Xindong Wu",
  title =        "Time series indexing by dynamic covering with
                 cross-range constraints",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1365--1384",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00614-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00614-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 28 May 2020 Pages: 1365 - 1384",
}

@Article{Huang:2020:EAA,
  author =       "Keke Huang and Jing Tang and Andrew Lim",
  title =        "Efficient approximation algorithms for adaptive
                 influence maximization",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1385--1406",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00615-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00615-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 01 June 2020 Pages: 1385 - 1406",
}

@Article{Li:2020:FSC,
  author =       "Rong-Hua Li and Lu Qin and Zibin Zheng",
  title =        "Finding skyline communities in multi-valued networks",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1407--1432",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00618-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00618-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 08 June 2020 Pages: 1407 - 1432",
}

@Article{Ahmad:2020:AWM,
  author =       "Hiba Abu Ahmad and Hongzhi Wang",
  title =        "Automatic weighted matching rectifying rule discovery
                 for data repairing",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1433--1447",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00617-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00617-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 09 June 2020 Pages: 1433 - 1447",
}

@Article{Linardi:2020:SDS,
  author =       "Michele Linardi and Themis Palpanas",
  title =        "Scalable data series subsequence matching with
                 {ULISSE}",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1449--1474",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00619-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00619-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 04 July 2020 Pages: 1449 - 1474",
}

@Article{Song:2020:IPA,
  author =       "Liangjun Song and Junhao Gan and Timos Sellis",
  title =        "Incremental preference adjustment: a graph-theoretical
                 approach",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1475--1500",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00623-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00623-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 03 August 2020 Pages: 1475 - 1500",
}

@Article{Lee:2020:TLA,
  author =       "Dongjin Lee and Kijung Shin and Christos Faloutsos",
  title =        "Temporal locality-aware sampling for accurate triangle
                 counting in real graph streams",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1501--1525",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00624-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00624-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 12 August 2020 Pages: 1501 - 1525",
}

@Article{Omidvar-Tehrani:2020:CAE,
  author =       "Behrooz Omidvar-Tehrani and Sihem Amer-Yahia and Laks
                 V. S. Lakshmanan",
  title =        "Cohort analytics: efficiency and applicability",
  journal =      j-VLDB-J,
  volume =       "29",
  number =       "6",
  pages =        "1527--1550",
  month =        nov,
  year =         "2020",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00625-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:40 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00625-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 27 August 2020 Pages: 1527 - 1550",
}

@Article{Ozcan:2021:GES,
  author =       "Fatma {\"O}zcan and Lei Chen",
  title =        "Guest Editorial: Special issue on {VLDB 2019}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "1--2",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00630-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00630-9",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 11 September 2020 Pages: 1 - 2",
}

@Article{Ruan:2021:LFG,
  author =       "Pingcheng Ruan and Tien Tuan Anh Dinh and Beng Chin
                 Ooi",
  title =        "{LineageChain}: a fine-grained, secure and efficient
                 data provenance system for blockchains",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "3--24",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00646-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00646-1",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 01 January 2021 Pages: 3 - 24",
}

@Article{Wu:2021:ATC,
  author =       "Chenggang Wu and Vikram Sreekanti and Joseph M.
                 Hellerstein",
  title =        "Autoscaling tiered cloud storage in {Anna}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "25--43",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00632-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00632-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 09 September 2020 Pages: 25 - 43",
}

@Article{Abuzaid:2021:DRI,
  author =       "Firas Abuzaid and Peter Kraft and Matei Zaharia",
  title =        "{DIFF}: a relational interface for large-scale data
                 explanation",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "45--70",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00633-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00633-6",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 30 September 2020 Pages: 45 - 70",
}

@Article{Whittaker:2021:ICC,
  author =       "Michael Whittaker and Joseph M. Hellerstein",
  title =        "Interactive checks for coordination avoidance",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "71--92",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00628-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00628-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 05 September 2020 Pages: 71 - 92",
}

@Article{Fan:2021:GBV,
  author =       "Hua Fan and Wojciech Golab",
  title =        "Gossip-based visibility control for high-performance
                 geo-distributed transactions",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "93--114",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00626-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00626-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 21 September 2020 Pages: 93 - 114",
}

@Article{Li:2021:QSD,
  author =       "Yuliang Li and Aaron Feng and Wang-Chiew Tan",
  title =        "Querying subjective data",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "1",
  pages =        "115--140",
  month =        jan,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00634-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00634-5",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 08 September 2020 Pages: 115 - 140",
}

@Article{Dong:2021:CTS,
  author =       "Yuyang Dong and Chuan Xiao and Hiroyuki Kitagawa",
  title =        "Continuous top-$k$ spatial-keyword search on dynamic
                 objects",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "141--161",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00627-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00627-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 05 September 2020 Pages: 141 - 161",
}

@Article{Zhang:2021:TTA,
  author =       "Feng Zhang and Jidong Zhai and Xiaoyong Du",
  title =        "{TADOC}: Text analytics directly on compression",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "163--188",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00636-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/datacompression.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00636-3",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 19 September 2020 Pages: 163 - 188",
}

@Article{Li:2021:CTQ,
  author =       "Yan Li and Hao Wang and Zhiguo Gong",
  title =        "Crowdsourced top-$k$ queries by pairwise preference
                 judgments with confidence and budget control",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "189--213",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00631-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00631-8",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 21 September 2020 Pages: 189 - 213",
}

@Article{Liu:2021:LET,
  author =       "Wanqi Liu and Hanchen Wang and Xuemin Lin",
  title =        "{EI-LSH}: An early-termination driven {I/O} efficient
                 incremental c -approximate nearest neighbor search",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "215--235",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00635-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00635-4",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 30 September 2020 Pages: 215 - 235",
}

@Article{Yu:2021:GCC,
  author =       "Jia Yu and Mohamed Sarwat",
  title =        "{GeoSparkViz}: a cluster computing system for
                 visualizing massive-scale geospatial data",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "237--258",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00645-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00645-2",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 07 January 2021 Pages: 237 - 258",
}

@Article{Zhang:2021:SAN,
  author =       "Yongqi Zhang and Quanming Yao and Lei Chen",
  title =        "Simple and automated negative sampling for knowledge
                 graph embedding",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "259--285",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00640-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00640-7",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 28 January 2021 Pages: 259 - 285",
}

@Article{Fang:2021:DHE,
  author =       "Ziquan Fang and Lu Chen and Christian S. Jensen",
  title =        "{Dragoon}: a hybrid and efficient big trajectory
                 management system for offline and online analytics",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "2",
  pages =        "287--310",
  month =        mar,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00652-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu May 13 17:41:41 MDT 2021",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00652-x",
  acknowledgement = ack-nhfb,
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
  online-date =  "Published: 03 February 2021 Pages: 287 - 310",
}

@Article{Paul:2021:SER,
  author =       "Debjyoti Paul and Feifei Li and Jeff M. Phillips",
  title =        "Semantic embedding for regions of interest",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "311--331",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00647-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00647-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Romanous:2021:ELL,
  author =       "Bashar Romanous and Skyler Windh and Vassilis
                 Tsotras",
  title =        "Efficient local locking for massively multithreaded
                 in-memory hash-based operators",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "333--359",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00642-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00642-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mao:2021:CES,
  author =       "Qizhong Mao and Steven Jacobs and Neal E. Young",
  title =        "Comparison and evaluation of state-of-the-art {LSM}
                 merge policies",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "361--378",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00638-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00638-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Piatov:2021:CES,
  author =       "Danila Piatov and Sven Helmer and Fabio Persia",
  title =        "Cache-efficient sweeping-based interval joins for
                 extended {Allen} relation predicates",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "379--402",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00650-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00650-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Do:2021:BDC,
  author =       "Jaeyoung Do and Ivan Luiz Picoli and Philippe Bonnet",
  title =        "Better database cost\slash performance via batched
                 {I/O} on programmable {SSD}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "403--424",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00648-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00648-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Song:2021:CTT,
  author =       "Shaoxu Song and Ruihong Huang and Jianmin Wang",
  title =        "Cleaning timestamps with temporal constraints",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "425--446",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00641-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00641-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2021:IEM,
  author =       "Chengcheng Yang and Dong Deng and Ling Shao",
  title =        "Internal and external memory set containment join",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "447--470",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00644-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Yang:2021:CIE}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00644-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2021:ESN,
  author =       "Xiaoshuang Chen and Longbin Lai and Xuemin Lin",
  title =        "Efficient structural node similarity computation on
                 billion-scale graphs",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "3",
  pages =        "471--493",
  month =        may,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00654-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 9 10:33:58 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00654-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yu:2021:VAR,
  author =       "Wenhui Yu and Xiangnan He and Zheng Qin",
  title =        "Visually aware recommendation with aesthetic
                 features",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "495--513",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00651-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00651-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hao:2021:MCE,
  author =       "Shuang Hao and Nan Tang and Ning Wang",
  title =        "Mis-categorized entities detection",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "515--536",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00653-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00653-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Galhotra:2021:EEP,
  author =       "Sainyam Galhotra and Donatella Firmani and Divesh
                 Srivastava",
  title =        "Efficient and effective {ER} with progressive
                 blocking",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "537--557",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00656-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00656-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hewasinghage:2021:CMR,
  author =       "Moditha Hewasinghage and Alberto Abell{\'o} and
                 Esteban Zim{\'a}nyi",
  title =        "A cost model for random access queries in document
                 stores",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "559--578",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00660-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00660-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Schneider:2021:DDS,
  author =       "Johannes Schneider and Phillip Wenig and Thorsten
                 Papenbrock",
  title =        "Distributed detection of sequential anomalies in
                 univariate time series",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "579--602",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00657-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00657-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2021:LKB,
  author =       "Zhida Chen and Lisi Chen and Christian S. Jensen",
  title =        "Location- and keyword-based querying of geo-textual
                 data: a survey",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "603--640",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00661-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00661-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sirin:2021:MAA,
  author =       "Utku Sirin and Pinar T{\"o}z{\"u}n and Anastasia
                 Ailamaki",
  title =        "Micro-architectural analysis of in-memory {OLTP}:
                 Revisited",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "641--665",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00663-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00663-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bouros:2021:MIJ,
  author =       "Panagiotis Bouros and Nikos Mamoulis and Manolis
                 Terrovitis",
  title =        "In-Memory Interval Joins",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "667--691",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00639-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00639-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Guo:2021:MAD,
  author =       "Yunyan Guo and Zhipeng Zhang and Jianzhong Li",
  title =        "Model averaging in distributed machine learning: a
                 case study with {Apache Spark}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "4",
  pages =        "693--712",
  month =        jul,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00664-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00664-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jiang:2021:EEK,
  author =       "Yuli Jiang and Xin Huang and Hong Cheng",
  title =        "{I/O} efficient $k$-truss community search in massive
                 graphs",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "713--738",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00649-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00649-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Balayn:2021:MBU,
  author =       "Agathe Balayn and Christoph Lofi and Geert-Jan
                 Houben",
  title =        "Managing bias and unfairness in data for decision
                 support: a survey of machine learning and data
                 engineering approaches to identify and mitigate bias
                 and unfairness within data management and analytics
                 systems",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "739--768",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00671-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00671-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Shao:2021:MAF,
  author =       "Yingxia Shao and Shiyue Huang and Lei Chen",
  title =        "Memory-aware framework for fast and scalable
                 second-order random walk over billion-edge natural
                 graphs",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "769--797",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00669-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00669-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Peng:2021:EHC,
  author =       "You Peng and Xuemin Lin and Jingren Zhou",
  title =        "Efficient Hop-constrained $s$--$t$ Simple Path
                 Enumeration",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "799--823",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00674-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00674-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Debrouvier:2021:MQL,
  author =       "Ariel Debrouvier and Eliseo Parodi and Alejandro
                 Vaisman",
  title =        "A model and query language for temporal graph
                 databases",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "825--858",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00675-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00675-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2021:FSH,
  author =       "Jin Wang and Jiacheng Wu and Carlo Zaniolo",
  title =        "Formal semantics and high performance in declarative
                 machine learning using {Datalog}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "859--881",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00665-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00665-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kersten:2021:TTF,
  author =       "Timo Kersten and Viktor Leis and Thomas Neumann",
  title =        "Tidy Tuples and Flying Start: fast compilation and
                 fast execution of relational queries in {Umbra}",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "883--905",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-020-00643-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-020-00643-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2021:CIE,
  author =       "Chengcheng Yang and Dong Deng and Ling Shao",
  title =        "Correction to: {Internal} and external memory set
                 containment join",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "5",
  pages =        "907--907",
  month =        sep,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00662-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Yang:2021:IEM}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00662-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Boniol:2021:USS,
  author =       "Paul Boniol and Michele Linardi and Emmanuel Remy",
  title =        "Unsupervised and scalable subsequence anomaly
                 detection in large data series",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "909--931",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00655-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Boniol:2023:CUS}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00655-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Tangwongsan:2021:OSW,
  author =       "Kanat Tangwongsan and Martin Hirzel and Scott
                 Schneider",
  title =        "In-order sliding-window aggregation in worst-case
                 constant time",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "933--957",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00668-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00668-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2021:HCE,
  author =       "Ji Zhang and Ke Zhou and Jiashu Xing",
  title =        "{CDBTune}$^+$: an efficient deep reinforcement
                 learning-based automatic cloud database tuning system",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "959--987",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00670-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00670-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2021:EBS,
  author =       "Hanzhi Wang and Zhewei Wei and Ji-Rong Wen",
  title =        "{ExactSim}: benchmarking single-source {SimRank}
                 algorithms with high-precision ground truths",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "989--1015",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00672-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00672-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Forresi:2021:DBF,
  author =       "Chiara Forresi and Enrico Gallinucci and Hamdi {Ben
                 Hamadou}",
  title =        "A dataspace-based framework for {OLAP} analyses in a
                 high-variety multistore",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "1017--1040",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00682-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00682-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Peng:2021:FDS,
  author =       "Botao Peng and Panagiota Fatourou and Themis
                 Palpanas",
  title =        "Fast data series indexing for in-memory data",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "1041--1067",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00677-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00677-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wei:2021:ADE,
  author =       "Ziheng Wei and Sven Hartmann and Sebastian Link",
  title =        "Algorithms for the discovery of embedded functional
                 dependencies",
  journal =      j-VLDB-J,
  volume =       "30",
  number =       "6",
  pages =        "1069--1093",
  month =        nov,
  year =         "2021",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00684-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Apr 14 14:19:08 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00684-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kossmann:2022:DDQ,
  author =       "Jan Kossmann and Thorsten Papenbrock and Felix
                 Naumann",
  title =        "Data dependencies for query optimization: a survey",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "1--22",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00676-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Kossmann:2023:CDD}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00676-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2022:PSA,
  author =       "Yifan Zhu and Lu Chen and Christian S. Jensen",
  title =        "Pivot selection algorithms in metric spaces: a survey
                 and experimental study",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "23--47",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00691-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00691-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Schmidl:2022:EDD,
  author =       "Sebastian Schmidl and Thorsten Papenbrock",
  title =        "Efficient distributed discovery of bidirectional order
                 dependencies",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "49--74",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00683-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00683-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Dignos:2022:LRJ,
  author =       "Anton Dign{\"o}s and Michael H. B{\"o}hlen and Peter
                 Moser",
  title =        "Leveraging range joins for the computation of overlap
                 joins",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "75--99",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00692-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00692-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Peng:2022:ARE,
  author =       "You Peng and Xuemin Lin and Lu Qin",
  title =        "Answering reachability and {$K$}-reach queries on
                 large graphs with label constraints",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "101--127",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00695-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00695-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2022:DLP,
  author =       "Wentao Li and Miao Qiao and Xuemin Lin",
  title =        "Distance labeling: on parallelism, compression, and
                 ordering",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "129--155",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00694-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00694-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Alevizos:2022:CEF,
  author =       "Elias Alevizos and Alexander Artikis and Georgios
                 Paliouras",
  title =        "Complex event forecasting with prediction suffix
                 trees",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "157--180",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00698-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00698-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Snodgrass:2022:QOH,
  author =       "Richard T. Snodgrass and Sabah Currim and Young-Kyoon
                 Suh",
  title =        "Have query optimizers hit the wall?",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "1",
  pages =        "181--200",
  month =        jan,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00689-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 15 07:02:55 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00689-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bonifati:2022:SIB,
  author =       "Angela Bonifati and Hannes Voigt",
  title =        "Special issue on big graph data management and
                 processing",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "201--202",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00732-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00732-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2022:TES,
  author =       "Kai Wang and Xuemin Lin and Ying Zhang",
  title =        "Towards efficient solutions of bitruss decomposition
                 for large-scale bipartite graphs",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "203--226",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00658-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00658-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Linghu:2022:ACE,
  author =       "Qingyuan Linghu and Fan Zhang and Ying Zhang",
  title =        "Anchored coreness: efficient reinforcement of social
                 networks",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "227--252",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00673-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00673-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yan:2022:PPF,
  author =       "Da Yan and Wenwen Qu and Yang Zhou",
  title =        "{PrefixFPM}: a parallel framework for general-purpose
                 mining of frequent and closed patterns",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "253--286",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00687-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00687-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yan:2022:GTG,
  author =       "Da Yan and Guimu Guo and John C. S. Lui",
  title =        "{G-thinker}: a general distributed framework for
                 finding qualified subgraphs in a big graph with load
                 balancing",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "287--320",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00688-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00688-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mohamed:2022:RKG,
  author =       "Aisha Mohamed and Ghadeer Abuoda and Ashraf
                 Aboulnaga",
  title =        "{RDFFrames}: knowledge graph access for machine
                 learning tools",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "321--346",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00690-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00690-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sagi:2022:DSR,
  author =       "Tomer Sagi and Matteo Lissandrini and Katja Hose",
  title =        "A design space for {RDF} data representations",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "347--373",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00725-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00725-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Rost:2022:DTG,
  author =       "Christopher Rost and Kevin Gomez and Erhard Rahm",
  title =        "Distributed temporal graph analytics with {GRADOOP}",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "375--401",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00667-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00667-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bevilacqua:2022:FME,
  author =       "Glenn S. Bevilacqua and Laks V. S. Lakshmanan",
  title =        "A fractional memory-efficient approach for online
                 continuous-time influence maximization",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "2",
  pages =        "403--429",
  month =        mar,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00679-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Apr 16 07:47:28 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00679-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ali:2022:SRS,
  author =       "Waqas Ali and Muhammad Saleem and Axel-Cyrille Ngonga
                 Ngomo",
  title =        "A survey of {RDF} stores \& {SPARQL} engines for
                 querying knowledge graphs",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "1--26",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00711-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00711-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pitoura:2022:FRR,
  author =       "Evaggelia Pitoura and Kostas Stefanidis and Georgia
                 Koutrika",
  title =        "Fairness in rankings and recommendations: an
                 overview",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "431--458",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00697-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00697-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hidayat:2022:CMM,
  author =       "Arif Hidayat and Muhammad Aamir Cheema and Ying
                 Zhang",
  title =        "Continuous monitoring of moving skyline and top-$k$
                 queries",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "459--482",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00702-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00702-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Farhan:2022:FFD,
  author =       "Muhammad Farhan and Qing Wang and Brendan McKay",
  title =        "Fast fully dynamic labelling for distance queries",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "483--506",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00707-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00707-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2022:RCS,
  author =       "Tianyu Zhao and Shuai Huang and Guoliang Li",
  title =        "{RNE}: computing shortest paths using road network
                 embedding",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "507--528",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00705-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00705-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lai:2022:AMW,
  author =       "Zhuohang Lai and Xibo Sun and Xiaolong Xie",
  title =        "Accelerating multi-way joins on the {GPU}",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "529--553",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00708-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00708-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{diVimercati:2022:AMQ,
  author =       "Sabrina {De Capitani di Vimercati} and Sara Foresti
                 and Pierangela Samarati",
  title =        "An authorization model for query execution in the
                 cloud",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "555--579",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00709-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00709-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2022:PEG,
  author =       "Kai Huang and Haibo Hu and Xiaofang Zhou",
  title =        "Privacy and efficiency guaranteed social subgraph
                 matching",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "3",
  pages =        "581--602",
  month =        may,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00706-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri May 6 07:32:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00706-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wen:2022:SRQ,
  author =       "Dong Wen and Bohua Yang and Wenjie Zhang",
  title =        "Span-reachability querying in large temporal graphs",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "629--647",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00715-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00715-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Khalil:2022:PML,
  author =       "Jalal Khalil and Da Yan and Lyuheng Yuan",
  title =        "Parallel mining of large maximal quasi-cliques",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "649--674",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00712-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00712-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kellou-Menouer:2022:SSS,
  author =       "Kenza Kellou-Menouer and Nikolaos Kardoulakis and
                 Haridimos Kondylakis",
  title =        "A survey on semantic schema discovery",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "675--710",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00717-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00717-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fritz:2022:EEC,
  author =       "Manuel Fritz and Michael Behringer and Holger
                 Schwarz",
  title =        "Efficient exploratory clustering analyses in
                 large-scale exploration processes",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "711--732",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00716-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00716-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zheng:2022:PPW,
  author =       "Libin Zheng and Lei Chen and Peng Cheng",
  title =        "Privacy-preserving worker allocation in
                 crowdsourcing",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "733--751",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00713-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00713-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Qin:2022:IDR,
  author =       "Xuedi Qin and Chengliang Chai and Mourad Ouzzani",
  title =        "Interactively discovering and ranking desired tuples
                 by data exploration",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "753--777",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00714-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00714-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhu:2022:OPP,
  author =       "Yuqing Zhu and Jing Tang and Xueyan Tang",
  title =        "Optimal price profile for influential nodes in online
                 social networks",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "779--795",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00727-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00727-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Amagata:2022:FEP,
  author =       "Daichi Amagata and Makoto Onizuka and Takahiro Hara",
  title =        "Fast, exact, and parallel-friendly outlier detection
                 algorithms with proximity graph in metric spaces",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "4",
  pages =        "797--821",
  month =        jul,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00729-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Jun 25 16:46:59 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00729-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2022:SIR,
  author =       "Zi Huang and Yanyan Shen and Divesh Srivastava",
  title =        "Special issue on responsible data management and data
                 science",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "823--823",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00761-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00761-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2022:AOD,
  author =       "Pei Li and Jaroslaw Szlichta and Divesh Srivastava",
  title =        "{ABC} of order dependencies",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "825--849",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00696-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00696-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Principe:2022:AHS,
  author =       "Renzo Arturo Alva Principe and Andrea Maurino and
                 Blerina Spahiu",
  title =        "{ABSTAT-HD}: a scalable tool for profiling very large
                 knowledge graphs",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "851--876",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00704-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00704-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2022:FAP,
  author =       "Qinyong Wang and Hongzhi Yin and Xiangliang Zhang",
  title =        "Fast-adapting and privacy-preserving federated
                 recommender system",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "877--896",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00700-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00700-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xiang:2022:GGG,
  author =       "Sheng Xiang and Dong Wen and Xuemin Lin",
  title =        "General graph generators: experiments, analyses, and
                 improvements",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "897--925",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00701-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00701-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2022:PGA,
  author =       "Zifan Liu and Zhechun Zhou and Theodoros Rekatsinas",
  title =        "Picket: guarding against corrupted data in tabular
                 data during learning and inference",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "927--955",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00699-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00699-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ge:2022:MMD,
  author =       "Yong-Feng Ge and Maria Orlowska and Yanchun Zhang",
  title =        "{MDDE}: multitasking distributed differential
                 evolution for privacy-preserving database
                 fragmentation",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "957--975",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00718-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00718-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Panjei:2022:SOE,
  author =       "Egawati Panjei and Le Gruenwald and Shejuti Silvia",
  title =        "A survey on outlier explanations",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "977--1008",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00721-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00721-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zeng:2022:EAS,
  author =       "Weixin Zeng and Xiang Zhao and Wei Wang",
  title =        "On entity alignment at scale",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1009--1033",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00703-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00703-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xu:2022:PBP,
  author =       "Qingyu Xu and Feng Zhang and Xiaoyong Du",
  title =        "Payment behavior prediction on shared parking lots
                 with {TR-GCN}",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1035--1058",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00722-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00722-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Sadiq:2022:IRN,
  author =       "Shazia Sadiq and Amir Aryani and Xiaofang Zhou",
  title =        "Information Resilience: the nexus of responsible and
                 agile approaches to information use",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1059--1084",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00720-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00720-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2022:ECR,
  author =       "Fanzhen Liu and Zhao Li and Quan Z. Sheng",
  title =        "{eRiskCom}: an e-commerce risky community detection
                 platform",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1085--1101",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00723-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00723-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Grafberger:2022:DDD,
  author =       "Stefan Grafberger and Paul Groth and Sebastian
                 Schelter",
  title =        "Data distribution debugging in machine learning
                 pipelines",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1103--1126",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00726-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00726-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2022:DTA,
  author =       "Qian Li and Zhichao Wang and Guandong Xu",
  title =        "Deep treatment-adaptive network for causal inference",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1127--1142",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00724-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00724-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2022:BCS,
  author =       "Rui Zhang and Bayu Distiawan Trisedya and Jianzhong
                 Qi",
  title =        "A benchmark and comprehensive survey on knowledge
                 graph entity alignment via representation learning",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "5",
  pages =        "1143--1168",
  month =        sep,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00747-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Mon Aug 29 11:34:10 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00747-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Porobic:2022:SIB,
  author =       "Danica Porobic",
  title =        "Special issue on the best papers of {DaMoN 2020}",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1169--1169",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00766-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00766-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Funke:2022:LLQ,
  author =       "Henning Funke and Jan M{\"u}hlig and Jens Teubner",
  title =        "Low-latency query compilation",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1171--1184",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00741-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00741-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bang:2022:FSC,
  author =       "Tiemo Bang and Norman May and Ilia Petrov and Carsten
                 Binnig",
  title =        "The full story of 1000 cores",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1185--1213",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00742-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00742-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pietrzyk:2022:SSV,
  author =       "Johannes Pietrzyk and Alexander Krause and Dirk Habich
                 and Wolfgang Lehner",
  title =        "To share or not to share vector registers?",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1215--1236",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00744-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00744-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Balazinska:2022:EV,
  author =       "Magdalena Balazinska and Xiaofang Zhou",
  title =        "Editorial for {S.I.}: {VLDB 2020}",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1237--1238",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00734-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00734-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2022:OOC,
  author =       "Yihe Huang and William Qian and Eddie Kohler and
                 Barbara Liskov and Liuba Shrira",
  title =        "Opportunities for optimism in contended main-memory
                 multicore transactions",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1239--1261",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00719-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00719-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kandula:2022:DIP,
  author =       "Srikanth Kandula and Laurel Orr and Surajit
                 Chaudhuri",
  title =        "Data-induced predicates for sideways information
                 passing in query optimizers",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1263--1290",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00693-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00693-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Herlihy:2022:CCD,
  author =       "Maurice Herlihy and Barbara Liskov and Liuba Shrira",
  title =        "Cross-chain deals and adversarial commerce",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1291--1309",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00686-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00686-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2022:AAC,
  author =       "Yuanbing Li and Xian Wu and Yifei Jin and Jian Li and
                 Guoliang Li and Jianhua Feng",
  title =        "Adapative algorithms for crowd-aided categorization",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1311--1337",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00685-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00685-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zheng:2022:PLF,
  author =       "Bolong Zheng and Xi Zhao and Lianggui Weng and Quoc
                 Viet Hung Nguyen and Hang Liu and Christian S. Jensen",
  title =        "{PM-LSH}: a fast and accurate in-memory framework for
                 high-dimensional approximate {NN} and closest pair
                 search",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1339--1363",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00680-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00680-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lyu:2022:MTE,
  author =       "Bingqing Lyu and Lu Qin and Xuemin Lin and Ying Zhang
                 and Zhengping Qian and Jingren Zhou",
  title =        "Maximum and top-$k$ diversified biclique search at
                 scale",
  journal =      j-VLDB-J,
  volume =       "31",
  number =       "6",
  pages =        "1365--1389",
  month =        nov,
  year =         "2022",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00681-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 22 11:01:17 MDT 2022",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00681-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2023:HHF,
  author =       "Wei Chen and Weiqing Wang and Hongzhi Yin and Lei Zhao
                 and Xiaofang Zhou",
  title =        "{HFUL}: a hybrid framework for user account linkage
                 across location-aware social networks",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "1--22",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00730-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00730-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ding:2023:FGE,
  author =       "Zeyu Ding and Yuxin Wang and Yingtai Xiao and Guanhong
                 Wang and Danfeng Zhang and Daniel Kifer",
  title =        "Free gap estimates from the exponential mechanism,
                 sparse vector, noisy max and related algorithms",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "23--48",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00728-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00728-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fan:2023:MGC,
  author =       "Wenfei Fan and Yuanhao Li and Muyang Liu and Can Lu",
  title =        "Making graphs compact by lossless contraction",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "49--73",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00731-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00731-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lourenco:2023:BID,
  author =       "Raoni Louren{\c{c}}o and Juliana Freire and Eric Simon
                 and Gabriel Weber and Dennis Shasha",
  title =        "{BugDoc}: Iterative debugging and explanation of
                 pipeline",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "75--101",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00733-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See correction \cite{Lourenco:2023:CBI}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00733-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Pankowski:2023:ODF,
  author =       "Tadeusz Pankowski",
  title =        "Ontological databases with faceted queries",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "103--121",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00735-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00735-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2023:ZRN,
  author =       "Gang Liu and Leying Chen and Shimin Chen",
  title =        "{Zen+}: a robust {NUMA}-aware {OLTP} engine optimized
                 for non-volatile main memory",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "123--148",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00737-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00737-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fan:2023:ADG,
  author =       "Wenfei Fan and Ruiqi Xu and Qiang Yin and Wenyuan Yu
                 and Jingren Zhou",
  title =        "Application-driven graph partitioning",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "149--172",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00736-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00736-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Miao:2023:AIR,
  author =       "Dongjing Miao and Pengfei Zhang and Jianzhong Li and
                 Ye Wang and Zhipeng Cai",
  title =        "Approximation and inapproximability results on
                 computing optimal repairs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "173--197",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00738-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00738-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Maroulis:2023:RAA,
  author =       "Stavros Maroulis and Nikos Bikakis and George
                 Papastefanatos and Panos Vassiliadis and Yannis
                 Vassiliou",
  title =        "Resource-aware adaptive indexing for in situ visual
                 exploration and analytics",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "199--227",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00739-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00739-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Huang:2023:DEM,
  author =       "Jiacheng Huang and Wei Hu and Zhifeng Bao and Qijin
                 Chen and Yuzhong Qu",
  title =        "Deep entity matching with adversarial active
                 learning",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "1",
  pages =        "229--255",
  month =        jan,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00745-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00745-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2023:ABC,
  author =       "Kai Wang and Xuemin Lin and Lu Qin and Wenjie Zhang
                 and Ying Zhang",
  title =        "Accelerated butterfly counting with vertex priority on
                 bipartite graphs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "257--281",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00746-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00746-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Nikookar:2023:DRS,
  author =       "Sepideh Nikookar and Mohammadreza Esfandiari and Ria
                 Mae Borromeo and Paras Sakharkar and Sihem Amer-Yahia
                 and Senjuti Basu Roy",
  title =        "Diversifying recommendations on sequences of sets",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "283--304",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00740-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00740-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Piai:2023:FGS,
  author =       "Federico Piai and Paolo Atzeni and Paolo Merialdo and
                 Divesh Srivastava",
  title =        "Fine-grained semantic type discovery for heterogeneous
                 sources using clustering",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "305--324",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00743-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00743-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2023:URR,
  author =       "Hao Liu and Jindong Han and Yanjie Fu and Yanyan Li
                 and Kai Chen and Hui Xiong",
  title =        "Unified route representation learning for multi-modal
                 transportation recommendation with spatiotemporal
                 pre-training",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "325--342",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00748-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00748-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kim:2023:FSQ,
  author =       "Hyunjoon Kim and Yunyoung Choi and Kunsoo Park and
                 Xuemin Lin and Seok-Hee Hong and Wook-Shin Han",
  title =        "Fast subgraph query processing and subgraph matching
                 via static and dynamic equivalences",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "343--368",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00749-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00749-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Nguyen:2023:DRL,
  author =       "Thanh Tam Nguyen and Thanh Trung Huynh and Hongzhi Yin
                 and Matthias Weidlich and Thanh Thi Nguyen and Thai Son
                 Mai and Quoc Viet Hung Nguyen",
  title =        "Detecting rumours with latency guarantees using
                 massive streaming data",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "369--387",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00750-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00750-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2023:VSE,
  author =       "Yang Li and Yu Shen and Wentao Zhang and Ce Zhang and
                 Bin Cui",
  title =        "{VolcanoML}: speeding up end-to-end {AutoML} via
                 scalable search space decomposition",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "389--413",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00752-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00752-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Bouganim:2023:HDP,
  author =       "Luc Bouganim and Julien Loudet and Iulian Sandu Popa",
  title =        "Highly distributed and privacy-preserving queries on
                 personal data management systems",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "415--445",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00753-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00753-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2023:IGL,
  author =       "Jiazun Chen and Jun Gao and Bin Cui",
  title =        "{ICS-GNN$^+$}: lightweight interactive community
                 search via graph neural network",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "447--467",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00754-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00754-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Boniol:2023:CUS,
  author =       "Paul Boniol and Michele Linardi and Federico Roncallo
                 and Themis Palpanas and Mohammed Meftah and Emmanuel
                 Remy",
  title =        "Correction to: {Unsupervised} and scalable subsequence
                 anomaly detection in large data series",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "469--469",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00678-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Boniol:2021:USS}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00678-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Kossmann:2023:CDD,
  author =       "Jan Kossmann and Thorsten Papenbrock and Felix
                 Naumann",
  title =        "Correction to: Data dependencies for query
                 optimization: a survey",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "471--471",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-021-00710-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Kossmann:2022:DDQ}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-021-00710-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lourenco:2023:CBI,
  author =       "Raoni Louren{\c{c}}o and Juliana Freire and Eric Simon
                 and Gabriel Weber and Dennis Shasha",
  title =        "Correction to: {BugDoc} Iterative debugging and
                 explanation of pipeline executions",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "2",
  pages =        "473--473",
  month =        mar,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00751-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Feb 25 08:12:25 MST 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Lourenco:2023:BID}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00751-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Azzalini:2023:EDA,
  author =       "Fabio Azzalini and Davide Piantella and Emanuele
                 Rabosio and Letizia Tanca",
  title =        "Enhancing domain-aware multi-truth data fusion using
                 copy-based source authority and value similarity",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "475--500",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00757-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00757-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ahmed:2023:RST,
  author =       "Pritom Ahmed and Ahmed Eldawy and Vagelis Hristidis
                 and Vassilis J. Tsotras",
  title =        "Reverse spatial top-$k$ keyword queries",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "501--524",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00759-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00759-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2023:POB,
  author =       "Meng Li and Rongbiao Xie and Deyi Chen and Haipeng Dai
                 and Rong Gu and He Huang and Wanchun Dou and Guihai
                 Chen",
  title =        "A {Pareto} optimal {Bloom} filter family with hash
                 adaptivity",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "525--548",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00755-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00755-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lei:2023:HDP,
  author =       "Chuan Lei and Abdul Quamar and Vasilis Efthymiou and
                 Fatma {\"O}zcan and Rana Alotaibi",
  title =        "{HERMES}: data placement and schema optimization for
                 enterprise knowledge bases",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "549--574",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00756-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00756-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2023:ENQ,
  author =       "Jiajia Li and Cancan Ni and Dan He and Lei Li and
                 Xiufeng Xia and Xiaofang Zhou",
  title =        "Efficient $k$ {NN} query for moving objects on
                 time-dependent road networks",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "575--594",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00758-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00758-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2023:MCS,
  author =       "Ziyi Liu and Lei Li and Mengxuan Zhang and Wen Hua and
                 Xiaofang Zhou",
  title =        "Multi-constraint shortest path using forest hop
                 labeling",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "595--621",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00760-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00760-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2023:LBQ,
  author =       "Pengcheng Zhang and Bin Yao and Chao Gao and Bin Wu
                 and Xiao He and Feifei Li and Yuanfei Lu and Chaoqun
                 Zhan and Feilong Tang",
  title =        "Learning-based query optimization for multi-probe
                 approximate nearest neighbor search",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "623--645",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00762-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00762-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Luo:2023:TMH,
  author =       "Qi Luo and Dongxiao Yu and Zhipeng Cai and Xuemin Lin
                 and Guanghui Wang and Xiuzhen Cheng",
  title =        "Toward maintenance of hypercores in large-scale
                 dynamic hypergraphs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "647--664",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00763-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00763-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2023:PPB,
  author =       "Liang Zhang and Noura Alghamdi and Huayi Zhang and
                 Mohamed Y. Eltabakh and Elke A. Rundensteiner",
  title =        "{PARROT}: pattern-based correlation exploitation in
                 big partitioned data series",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "3",
  pages =        "665--688",
  month =        may,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00767-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Apr 21 10:46:50 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00767-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wellenzohn:2023:RSC,
  author =       "Kevin Wellenzohn and Michael H. B{\"o}hlen and Sven
                 Helmer and Antoine Pietri and Stefano Zacchiroli",
  title =        "Robust and scalable content-and-structure indexing",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "689--715",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00764-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00764-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Miao:2023:PPP,
  author =       "Xupeng Miao and Wentao Zhang and Yuezihan Jiang and
                 Fangcheng Fu and Yingxia Shao and Lei Chen and Yangyu
                 Tao and Gang Cao and Bin Cui",
  title =        "{P$^2$CG}: a privacy preserving collaborative graph
                 neural network training framework",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "717--736",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00768-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00768-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Islam:2023:GFE,
  author =       "Md Mouinul Islam and Mahsa Asadi and Sihem Amer-Yahia
                 and Senjuti Basu Roy",
  title =        "A generic framework for efficient computation of
                 top-$k$ diverse results",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "737--761",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00770-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00770-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Echihabi:2023:PDS,
  author =       "Karima Echihabi and Theophanis Tsandilas and Anna
                 Gogolou and Anastasia Bezerianos and Themis Palpanas",
  title =        "{ProS}: data series progressive $k$-{NN} similarity
                 search and classification with probabilistic quality
                 guarantees",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "763--789",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00771-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00771-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Whang:2023:DCQ,
  author =       "Steven Euijong Whang and Yuji Roh and Hwanjun Song and
                 Jae-Gil Lee",
  title =        "Data collection and quality challenges in deep
                 learning: a data-centric {AI} perspective",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "791--813",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00775-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00775-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Lou:2023:TTA,
  author =       "Yunkai Lou and Chaokun Wang and Tiankai Gu and Hao
                 Feng and Jun Chen and Jeffrey Xu Yu",
  title =        "Time-topology analysis on temporal graphs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "815--843",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00772-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00772-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ntroumpogiannis:2023:MLA,
  author =       "Antonios Ntroumpogiannis and Michail Giannoulis and
                 Nikolaos Myrtakis and Vassilis Christophides and Eric
                 Simon and Ioannis Tsamardinos",
  title =        "A meta-level analysis of online anomaly detectors",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "845--886",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00773-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00773-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2023:SSQ,
  author =       "Dongxiang Zhang and Zhihao Chang and Dingyu Yang and
                 Dongsheng Li and Kian-Lee Tan and Ke Chen and Gang
                 Chen",
  title =        "{SQUID}: subtrajectory query in trillion-scale {GPS}
                 database",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "887--904",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00777-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00777-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Katsogiannis-Meimarakis:2023:SDL,
  author =       "George Katsogiannis-Meimarakis and Georgia Koutrika",
  title =        "A survey on deep learning approaches for
                 text-to-{SQL}",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "4",
  pages =        "905--936",
  month =        jul,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00776-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Thu Jun 1 08:33:00 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00776-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2023:LSS,
  author =       "Kangfei Zhao and Jeffrey Xu Yu and Qiyan Li and Hao
                 Zhang and Yu Rong",
  title =        "Learned sketch for subgraph counting: a holistic
                 approach",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "937--962",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00781-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00781-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yamada:2023:ALT,
  author =       "Masaya Yamada and Hiroyuki Kitagawa and Toshiyuki
                 Amagasa and Akiyoshi Matono",
  title =        "Augmented lineage: traceability of data analysis
                 including complex {UDF} processing",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "963--983",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00769-7",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00769-7",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Verwiebe:2023:SWT,
  author =       "Juliane Verwiebe and Philipp M. Grulich and Jonas
                 Traub and Volker Markl",
  title =        "Survey of window types for aggregation in stream
                 processing systems",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "985--1011",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00778-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Verwiebe:2024:CSW}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00778-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhou:2023:BCB,
  author =       "Alexander Zhou and Yue Wang and Lei Chen",
  title =        "Butterfly counting and bitruss decomposition on
                 uncertain bipartite graphs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1013--1036",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00782-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00782-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hirsch:2023:EDK,
  author =       "Vitali Hirsch and Peter Reimann and Dennis
                 Treder-Tschechlov and Holger Schwarz and Bernhard
                 Mitschang",
  title =        "Exploiting domain knowledge to address class imbalance
                 and a heterogeneous feature space in multi-class
                 classification",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1037--1064",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00780-6",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00780-6",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Xu:2023:LUI,
  author =       "Jia Xu and Zulong Chen and Wanjie Tao and Ziyi Wang
                 and Detao Lv and Yao Yu and Chuanfei Xu",
  title =        "Leveraging user itinerary to improve personalized deep
                 matching at {Fliggy}",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1065--1086",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00787-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00787-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Gou:2023:SWB,
  author =       "Xiangyang Gou and Lei Zou",
  title =        "Sliding window-based approximate triangle counting
                 with bounded memory usage",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1087--1110",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00783-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00783-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Schiavio:2023:DDQ,
  author =       "Filippo Schiavio and Daniele Bonetta and Walter
                 Binder",
  title =        "{DynQ}: a dynamic query engine with query-reuse
                 capabilities embedded in a polyglot runtime",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1111--1135",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00784-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00784-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2023:BCE,
  author =       "Jianye Yang and Yun Peng and Dian Ouyang and Wenjie
                 Zhang and Xuemin Lin and Xiang Zhao",
  title =        "$ (p, q)$-biclique counting and enumeration for large
                 sparse bipartite graphs",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "5",
  pages =        "1137--1161",
  month =        sep,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00786-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Fri Aug 18 07:36:55 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00786-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Naumann:2023:ESI,
  author =       "Felix Naumann and Xin Luna Dong",
  title =        "Editorial: Special Issue for Selected Papers of {VLDB
                 2021}",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1163--1163",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00792-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00792-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fent:2023:PPE,
  author =       "Philipp Fent and Altan Birler and Thomas Neumann",
  title =        "Practical planning and execution of groupjoin and
                 nested aggregates",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1165--1190",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00765-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00765-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Farias:2023:LDD,
  author =       "Victor A. E. Farias and Felipe T. Brito and Cheryl
                 Flynn and Javam C. Machado and Subhabrata Majumdar and
                 Divesh Srivastava",
  title =        "Local dampening: differential privacy for non-numeric
                 queries via local sensitivity",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1191--1214",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-022-00774-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-022-00774-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Li:2023:EEM,
  author =       "Yuliang Li and Jinfeng Li and Yoshi Suhara and AnHai
                 Doan and Wang-Chiew Tan",
  title =        "Effective entity matching with transformers",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1215--1235",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00779-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00779-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2023:PSE,
  author =       "Renchi Yang and Jieming Shi and Xiaokui Xiao and Yin
                 Yang and Sourav S. Bhowmick and Juncheng Liu",
  title =        "{PANE}: scalable and effective attributed network
                 embedding",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1237--1262",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00790-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00790-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ouyang:2023:WHM,
  author =       "Dian Ouyang and Dong Wen and Lu Qin and Lijun Chang
                 and Xuemin Lin and Ying Zhang",
  title =        "When hierarchy meets 2-hop-labeling: efficient
                 shortest distance and path queries on road networks",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1263--1287",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00789-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00789-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Qian:2023:IDD,
  author =       "Chaoqin Qian and Menglu Li and Zijing Tan and Ai Ran
                 and Shuai Ma",
  title =        "Incremental discovery of denial constraints",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1289--1313",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00788-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00788-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2023:TGC,
  author =       "Zuozhi Wang and Kai Zeng and Botong Huang and Wei Chen
                 and Xiaozong Cui and Bo Wang and Ji Liu and Liya Fan
                 and Dachuan Qu and Zhenyu Hou and Tao Guan and Chen Li
                 and Jingren Zhou",
  title =        "{Tempura}: a general cost-based optimizer framework
                 for incremental data processing (Journal Version)",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1315--1342",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00785-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00785-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Hellings:2023:BSB,
  author =       "Jelle Hellings and Mohammad Sadoghi",
  title =        "{ByShard}: sharding in a {Byzantine} environment",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1343--1367",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00794-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00794-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Papadakis:2023:AOO,
  author =       "George Papadakis and Vasilis Efthymiou and Emmanouil
                 Thanos and Oktie Hassanzadeh and Peter Christen",
  title =        "An analysis of one-to-one matching algorithms for
                 entity resolution",
  journal =      j-VLDB-J,
  volume =       "32",
  number =       "6",
  pages =        "1369--1400",
  month =        nov,
  year =         "2023",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00791-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Sat Oct 21 08:56:16 MDT 2023",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00791-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2024:SDC,
  author =       "Haoyu Wang and Aoqian Zhang and Shaoxu Song and
                 Jianmin Wang",
  title =        "Streaming data cleaning based on speed change",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "1--24",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00796-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00796-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Aghasadeghi:2024:TGP,
  author =       "Amir Aghasadeghi and Jan {Van den Bussche} and Julia
                 Stoyanovich",
  title =        "Temporal graph patterns by timed automata",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "25--47",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00795-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00795-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chang:2024:NOA,
  author =       "Lijun Chang and Zhiyi Wang",
  title =        "A near-optimal approach to edge connectivity-based
                 hierarchical graph decomposition",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "49--71",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00797-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00797-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Christodoulou:2024:HHI,
  author =       "George Christodoulou and Panagiotis Bouros and Nikos
                 Mamoulis",
  title =        "{HINT}: a hierarchical interval index for {Allen}
                 relationships",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "73--100",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00798-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00798-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Farhan:2024:BBD,
  author =       "Muhammad Farhan and Henning Koehler and Qing Wang",
  title =        "{BatchHL$^+$}: batch dynamic labelling for distance
                 queries on large-scale networks",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "101--129",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00799-9",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00799-9",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Meilicke:2024:ABR,
  author =       "Christian Meilicke and Melisachew Wudage Chekol and
                 Patrick Betz and Manuel Fink and Heiner Stuckeschmidt",
  title =        "Anytime bottom-up rule learning for large-scale
                 knowledge graph completion",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "131--161",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00800-5",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00800-5",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2024:CBT,
  author =       "Yan Zhao and Kai Zheng and Ziwei Wang and Liwei Deng
                 and Bin Yang and Torben Bach Pedersen and Christian S.
                 Jensen and Xiaofang Zhou",
  title =        "Coalition-based task assignment with priority-aware
                 fairness in spatial crowdsourcing",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "163--184",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00802-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00802-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Shaham:2024:SSD,
  author =       "Sina Shaham and Gabriel Ghinita and Cyrus Shahabi",
  title =        "Supporting secure dynamic alert zones using searchable
                 encryption and graph embedding",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "185--206",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00803-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/cryptography2020.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00803-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Ma:2024:ADD,
  author =       "Chenhao Ma and Yixiang Fang and Reynold Cheng and Laks
                 V. S. Lakshmanan and Xiaolin Han and Xiaodong Li",
  title =        "Accelerating directed densest subgraph queries with
                 software and hardware approaches",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "207--230",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00805-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00805-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Mouratidis:2024:QCD,
  author =       "Kyriakos Mouratidis and Keming Li and Bo Tang",
  title =        "Quantifying the competitiveness of a dataset in
                 relation to general preferences",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "231--250",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00804-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00804-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Verwiebe:2024:CSW,
  author =       "Juliane Verwiebe and Philipp M. Grulich and Jonas
                 Traub and Volker Markl",
  title =        "Correction to: {Survey} of window types for
                 aggregation in stream processing systems",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "251--251",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00793-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Verwiebe:2023:SWT}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00793-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Yang:2024:CTF,
  author =       "Fan Yang and Faisal M. Almutairi and Hyun Ah Song and
                 Christos Faloutsos and Nicholas D. Sidiropoulos and
                 Vladimir Zadorozhny",
  title =        "Correction to: {TurboLift}: fast accuracy lifting for
                 historical data recovery",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "1",
  pages =        "253--253",
  month =        jan,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00801-4",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  note =         "See \cite{Yang:2020:TFA}.",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00801-4",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Liu:2024:TDS,
  author =       "Tongyu Liu and Ju Fan and Guoliang Li and Nan Tang and
                 Xiaoyong Du",
  title =        "Tabular data synthesis with generative adversarial
                 networks: design space and optimizations",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "255--280",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00807-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00807-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Karpov:2024:MFA,
  author =       "Nikolai Karpov and Haoyu Zhang and Qin Zhang",
  title =        "{MinJoin++}: a fast algorithm for string similarity
                 joins under edit distance",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "281--299",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00806-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00806-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Usta:2024:XEN,
  author =       "Arif Usta and Akifhan Karakayali and {\"O}zg{\"u}r
                 Ulusoy",
  title =        "{xDBTagger}: explainable natural language interface to
                 databases using keyword mappings and schema graph",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "301--321",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00809-w",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00809-w",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Wang:2024:CEU,
  author =       "Jiayi Wang and Chengliang Chai and Jiabin Liu and
                 Guoliang Li",
  title =        "Cardinality estimation using normalizing flow",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "323--348",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00808-x",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00808-x",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Arroyuelo:2024:ORC,
  author =       "Diego Arroyuelo and Adri{\'a}n G{\'o}mez-Brand{\'o}n
                 and Aidan Hogan and Gonzalo Navarro and Javiel
                 Rojas-Ledesma",
  title =        "Optimizing {RPQs} over a compact graph
                 representation",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "349--374",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00811-2",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00811-2",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Chen:2024:QEP,
  author =       "Zhiwen Chen and Daokun Hu and Wenkui Che and Jianhua
                 Sun and Hao Chen",
  title =        "A quantitative evaluation of persistent memory hash
                 indexes",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "375--397",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00812-1",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/hash.bib;
                 https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00812-1",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Abello:2024:EEM,
  author =       "Alberto Abell{\'o} and James Cheney",
  title =        "{Eris}: efficiently measuring discord in
                 multidimensional sources",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "399--423",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00810-3",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00810-3",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Jiang:2024:SEM,
  author =       "Jiawei Jiang and Shaoduo Gan and Bo Du and Gustavo
                 Alonso and Ana Klimovic and Ankit Singla and Wentao Wu
                 and Sheng Wang and Ce Zhang",
  title =        "A systematic evaluation of machine learning on
                 serverless infrastructure",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "425--449",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00813-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00813-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhang:2024:STS,
  author =       "Shuhao Zhang and Juan Soto and Volker Markl",
  title =        "A survey on transactional stream processing",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "451--479",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00814-z",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00814-z",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{dHondt:2024:EDM,
  author =       "Jens E. d'Hondt and Koen Minartz and Odysseas
                 Papapetrou",
  title =        "Efficient detection of multivariate correlations with
                 different correlation measures",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "481--505",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00815-y",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00815-y",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Fragkoulis:2024:SES,
  author =       "Marios Fragkoulis and Paris Carbone and Vasiliki
                 Kalavri and Asterios Katsifodimos",
  title =        "A survey on the evolution of stream processing
                 systems",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "507--541",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00819-8",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00819-8",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

@Article{Zhao:2024:RRE,
  author =       "Hongyao Zhao and Jingyao Li and Wei Lu and Qian Zhang
                 and Wanqing Yang and Jiajia Zhong and Meihui Zhang and
                 Haixiang Li and Xiaoyong Du and Anqun Pan",
  title =        "{RCBench}: an {RDMA}-enabled transaction framework for
                 analyzing concurrency control algorithms",
  journal =      j-VLDB-J,
  volume =       "33",
  number =       "2",
  pages =        "543--567",
  month =        mar,
  year =         "2024",
  CODEN =        "VLDBFR",
  DOI =          "https://doi.org/10.1007/s00778-023-00821-0",
  ISSN =         "1066-8888 (print), 0949-877X (electronic)",
  ISSN-L =       "1066-8888",
  bibdate =      "Tue Mar 19 08:11:52 MDT 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/vldbj.bib",
  URL =          "https://link.springer.com/article/10.1007/s00778-023-00821-0",
  acknowledgement = ack-nhfb,
  ajournal =     "VLDB J.",
  fjournal =     "VLDB Journal: Very Large Data Bases",
  journal-URL =  "http://portal.acm.org/toc.cfm?id=J869",
}

%% TO DO: [10-Nov-2016] v23n2 (spring 2014) is STILL missing at ACM Web
%% site, but data have been found at Springer Web site and converted to
%% BibTeX, so coverage is complete again.
%% Reported to ACM on [18-Sep-2015] via portal-feedback<at>hq.acm.org.