%%% -*-BibTeX-*-
%%% ====================================================================
%%% BibTeX-file{
%%%     author          = "Nelson H. F. Beebe",
%%%     version         = "1.58",
%%%     date            = "15 January 2024",
%%%     time            = "11:09:04 MST",
%%%     filename        = "tkdd.bib",
%%%     address         = "University of Utah
%%%                        Department of Mathematics, 110 LCB
%%%                        155 S 1400 E RM 233
%%%                        Salt Lake City, UT 84112-0090
%%%                        USA",
%%%     telephone       = "+1 801 581 5254",
%%%     FAX             = "+1 801 581 4148",
%%%     URL             = "http://www.math.utah.edu/~beebe",
%%%     checksum        = "14709 38993 203820 1914094",
%%%     email           = "beebe at math.utah.edu, beebe at acm.org,
%%%                        beebe at computer.org (Internet)",
%%%     codetable       = "ISO/ASCII",
%%%     keywords        = "ACM Transactions on Knowledge Discovery from
%%%                        Data (TKDD); bibliography; TKDD",
%%%     license         = "public domain",
%%%     supported       = "yes",
%%%     docstring       = "This is a COMPLETE BibTeX bibliography for
%%%                        ACM Transactions on Knowledge Discovery from
%%%                        Data (TKDD) (CODEN ????, ISSN 1556-4681),
%%%                        covering all journal issues from 2007 --
%%%                        date.
%%%
%%%                        At version 1.58, the COMPLETE journal
%%%                        coverage looked like this:
%%%
%%%                             2007 (  14)    2013 (  20)    2019 (  65)
%%%                             2008 (  18)    2014 (  37)    2020 (  78)
%%%                             2009 (  25)    2015 (  41)    2021 ( 112)
%%%                             2010 (  26)    2016 (  54)    2022 ( 126)
%%%                             2011 (  11)    2017 (  27)    2023 ( 137)
%%%                             2012 (  26)    2018 (  73)    2024 (  70)
%%%
%%%                             Article:        960
%%%
%%%                             Total entries:  960
%%%
%%%                        The journal Web page can be found at:
%%%
%%%                            http://www.acm.org/pubs/tkdd.html
%%%
%%%                        The journal table of contents page is at:
%%%
%%%                            http://www.acm.org/tkdd/
%%%                            http://portal.acm.org/browse_dl.cfm?idx=J1054
%%%
%%%                        Qualified subscribers can retrieve the full
%%%                        text of recent articles in PDF form.
%%%
%%%                        The initial draft was extracted from the ACM
%%%                        Web pages.
%%%
%%%                        ACM copyrights explicitly permit abstracting
%%%                        with credit, so article abstracts, keywords,
%%%                        and subject classifications have been
%%%                        included in this bibliography wherever
%%%                        available.  Article reviews have been
%%%                        omitted, until their copyright status has
%%%                        been clarified.
%%%
%%%                        bibsource keys in the bibliography entries
%%%                        below indicate the entry originally came
%%%                        from the computer science bibliography
%%%                        archive, even though it has likely since
%%%                        been corrected and updated.
%%%
%%%                        URL keys in the bibliography point to
%%%                        World Wide Web locations of additional
%%%                        information about the entry.
%%%
%%%                        BibTeX citation tags are uniformly chosen
%%%                        as name:year:abbrev, where name is the
%%%                        family name of the first author or editor,
%%%                        year is a 4-digit number, and abbrev is a
%%%                        3-letter condensation of important title
%%%                        words. Citation tags were automatically
%%%                        generated by software developed for the
%%%                        BibNet Project.
%%%
%%%                        In this bibliography, entries are sorted in
%%%                        publication order, using ``bibsort -byvolume.''
%%%
%%%                        The checksum field above contains a CRC-16
%%%                        checksum as the first value, followed by the
%%%                        equivalent of the standard UNIX wc (word
%%%                        count) utility output of lines, words, and
%%%                        characters.  This is produced by Robert
%%%                        Solovay's checksum utility."
%%%     }
%%% ====================================================================

@Preamble{"\input bibnames.sty" #
    "\def \TM {${}^{\sc TM}$}" #
    "\ifx \undefined \bioname      \def \bioname#1{{{\em #1\/}}} \fi"
}

%%% ====================================================================
%%% Acknowledgement abbreviations:

@String{ack-nhfb = "Nelson H. F. Beebe,
                    University of Utah,
                    Department of Mathematics, 110 LCB,
                    155 S 1400 E RM 233,
                    Salt Lake City, UT 84112-0090, USA,
                    Tel: +1 801 581 5254,
                    FAX: +1 801 581 4148,
                    e-mail: \path|beebe@math.utah.edu|,
                            \path|beebe@acm.org|,
                            \path|beebe@computer.org| (Internet),
                    URL: \path|http://www.math.utah.edu/~beebe/|"}

%%% ====================================================================
%%% Journal abbreviations:

@String{j-TKDD                  = "ACM Transactions on Knowledge
                                  Discovery from Data (TKDD)"}

%%% ====================================================================
%%% Bibliography entries:

@Article{Han:2007:I,
  author =       "Jiawei Han",
  title =        "Introduction",
  journal =      j-TKDD,
  volume =       "1",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1217299.1217300",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:36 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Leskovec:2007:GED,
  author =       "Jure Leskovec and Jon Kleinberg and Christos
                 Faloutsos",
  title =        "Graph evolution: {Densification} and shrinking
                 diameters",
  journal =      j-TKDD,
  volume =       "1",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1217299.1217301",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:36 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How do real graphs evolve over time? What are normal
                 growth patterns in social, technological, and
                 information networks? Many studies have discovered
                 patterns in {\em static graphs}, identifying properties
                 in a single snapshot of a large network or in a very
                 small number of snapshots; these include heavy tails
                 for in- and out-degree distributions, communities,
                 small-world phenomena, and others. However, given the
                 lack of information about network evolution over long
                 periods, it has been hard to convert these findings
                 into statements about trends over time.\par

                 Here we study a wide range of real graphs, and we
                 observe some surprising phenomena. First, most of these
                 graphs densify over time with the number of edges
                 growing superlinearly in the number of nodes. Second,
                 the average distance between nodes often shrinks over
                 time in contrast to the conventional wisdom that such
                 distance parameters should increase slowly as a
                 function of the number of nodes (like $ O(\log n) $ or
                 $ O(\log (\log n))$).\par

                 Existing graph generation models do not exhibit these
                 types of behavior even at a qualitative level. We
                 provide a new graph generator, based on a forest fire
                 spreading process that has a simple, intuitive
                 justification, requires very few parameters (like the
                 flammability of nodes), and produces graphs exhibiting
                 the full range of properties observed both in prior
                 work and in the present study.\par

                 We also notice that the forest fire model exhibits a
                 sharp transition between sparse graphs and graphs that
                 are densifying. Graphs with decreasing distance between
                 the nodes are generated around this transition
                 point.\par

                 Last, we analyze the connection between the temporal
                 evolution of the degree distribution and densification
                 of a graph. We find that the two are fundamentally
                 related. We also observe that real networks exhibit
                 this type of relation between densification and the
                 degree distribution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Densification power laws; graph generators; graph
                 mining; heavy-tailed distributions; small-world
                 phenomena",
}

@Article{Machanavajjhala:2007:DPB,
  author =       "Ashwin Machanavajjhala and Daniel Kifer and Johannes
                 Gehrke and Muthuramakrishnan Venkitasubramaniam",
  title =        "{$L$}-diversity: {Privacy} beyond $k$-anonymity",
  journal =      j-TKDD,
  volume =       "1",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1217299.1217302",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:36 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Publishing data about individuals without revealing
                 sensitive information about them is an important
                 problem. In recent years, a new definition of privacy
                 called $k$-anonymity has gained popularity. In a
                 $k$-anonymized dataset, each record is
                 indistinguishable from at least $ k - 1$ other records
                 with respect to certain identifying attributes.\par

                 In this article, we show using two simple attacks that
                 a $k$-anonymized dataset has some subtle but severe
                 privacy problems. First, an attacker can discover the
                 values of sensitive attributes when there is little
                 diversity in those sensitive attributes. This is a
                 known problem. Second, attackers often have background
                 knowledge, and we show that $k$-anonymity does not
                 guarantee privacy against attackers using background
                 knowledge. We give a detailed analysis of these two
                 attacks, and we propose a novel and powerful privacy
                 criterion called $ \ell $-diversity that can defend
                 against such attacks. In addition to building a formal
                 foundation for $ \ell $-diversity, we show in an
                 experimental evaluation that $ \ell $-diversity is
                 practical and can be implemented efficiently.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "-diversity; Data privacy; ell-k-anonymity;
                 privacy-preserving data publishing",
}

@Article{Gionis:2007:CA,
  author =       "Aristides Gionis and Heikki Mannila and Panayiotis
                 Tsaparas",
  title =        "Clustering aggregation",
  journal =      j-TKDD,
  volume =       "1",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1217299.1217303",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:36 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We consider the following problem: given a set of
                 clusterings, find a single clustering that agrees as
                 much as possible with the input clusterings. This
                 problem, {\em clustering aggregation}, appears
                 naturally in various contexts. For example, clustering
                 categorical data is an instance of the clustering
                 aggregation problem; each categorical attribute can be
                 viewed as a clustering of the input rows where rows are
                 grouped together if they take the same value on that
                 attribute. Clustering aggregation can also be used as a
                 metaclustering method to improve the robustness of
                 clustering by combining the output of multiple
                 algorithms. Furthermore, the problem formulation does
                 not require a priori information about the number of
                 clusters; it is naturally determined by the
                 optimization function.\par

                 In this article, we give a formal statement of the
                 clustering aggregation problem, and we propose a number
                 of algorithms. Our algorithms make use of the
                 connection between clustering aggregation and the
                 problem of {\em correlation clustering}. Although the
                 problems we consider are NP-hard, for several of our
                 methods, we provide theoretical guarantees on the
                 quality of the solutions. Our work provides the best
                 deterministic approximation algorithm for the variation
                 of the correlation clustering problem we consider. We
                 also show how sampling can be used to scale the
                 algorithms for large datasets. We give an extensive
                 empirical evaluation demonstrating the usefulness of
                 the problem and of the solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "clustering aggregation; clustering categorical data;
                 correlation clustering; Data clustering",
}

@Article{Bhattacharya:2007:CER,
  author =       "Indrajit Bhattacharya and Lise Getoor",
  title =        "Collective entity resolution in relational data",
  journal =      j-TKDD,
  volume =       "1",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1217299.1217304",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:36 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Many databases contain uncertain and imprecise
                 references to real-world entities. The absence of
                 identifiers for the underlying entities often results
                 in a database which contains multiple references to the
                 same entity. This can lead not only to data redundancy,
                 but also inaccuracies in query processing and knowledge
                 extraction. These problems can be alleviated through
                 the use of {\em entity resolution}. Entity resolution
                 involves discovering the underlying entities and
                 mapping each database reference to these entities.
                 Traditionally, entities are resolved using pairwise
                 similarity over the attributes of references. However,
                 there is often additional relational information in the
                 data. Specifically, references to different entities
                 may cooccur. In these cases, collective entity
                 resolution, in which entities for cooccurring
                 references are determined jointly rather than
                 independently, can improve entity resolution accuracy.
                 We propose a novel relational clustering algorithm that
                 uses both attribute and relational information for
                 determining the underlying domain entities, and we give
                 an efficient implementation. We investigate the impact
                 that different relational similarity measures have on
                 entity resolution quality. We evaluate our collective
                 entity resolution algorithm on multiple real-world
                 databases. We show that it improves entity resolution
                 performance over both attribute-based baselines and
                 over algorithms that consider relational information
                 but do not resolve entities collectively. In addition,
                 we perform detailed experiments on synthetically
                 generated data to identify data characteristics that
                 favor collective relational resolution over purely
                 attribute-based algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "data cleaning; Entity resolution; graph clustering;
                 record linkage",
}

@Article{Loh:2007:EEL,
  author =       "Wei-Yin Loh and Chien-Wei Chen and Wei Zheng",
  title =        "Extrapolation errors in linear model trees",
  journal =      j-TKDD,
  volume =       "1",
  number =       "2",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1267066.1267067",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:48 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Prediction errors from a linear model tend to be
                 larger when extrapolation is involved, particularly
                 when the model is wrong. This article considers the
                 problem of extrapolation and interpolation errors when
                 a linear model tree is used for prediction. It proposes
                 several ways to curtail the size of the errors, and
                 uses a large collection of real datasets to demonstrate
                 that the solutions are effective in reducing the
                 average mean squared prediction error. The article also
                 provides a proof that, if a linear model is correct,
                 the proposed solutions have no undesirable effects as
                 the training sample size tends to infinity.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Decision tree; prediction; regression; statistics",
}

@Article{Zhang:2007:MPP,
  author =       "Minghua Zhang and Ben Kao and David W. Cheung and
                 Kevin Y. Yip",
  title =        "Mining periodic patterns with gap requirement from
                 sequences",
  journal =      j-TKDD,
  volume =       "1",
  number =       "2",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1267066.1267068",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:48 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We study a problem of mining frequently occurring
                 periodic patterns with a gap requirement from
                 sequences. Given a character sequence $S$ of length $L$
                 and a pattern $P$ of length $l$, we consider $P$ a
                 frequently occurring pattern in $S$ if the probability
                 of {\em observing\/} $P$ given a randomly picked
                 length-$l$ subsequence of $S$ exceeds a certain
                 threshold. In many applications, particularly those
                 related to bioinformatics, interesting patterns are
                 {\em periodic\/} with a {\em gap requirement}. That is
                 to say, the characters in $P$ should match subsequences
                 of $S$ in such a way that the matching characters in
                 $S$ are separated by gaps of more or less the same
                 size. We show the complexity of the mining problem and
                 discuss why traditional mining algorithms are
                 computationally infeasible. We propose practical
                 algorithms for solving the problem and study their
                 characteristics. We also present a case study in which
                 we apply our algorithms on some DNA sequences. We
                 discuss some interesting patterns obtained from the
                 case study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "gap requirement; periodic pattern; Sequence mining",
}

@Article{Huang:2007:TTE,
  author =       "Jen-Wei Huang and Bi-Ru Dai and Ming-Syan Chen",
  title =        "{Twain}: {Two-end} association miner with precise
                 frequent exhibition periods",
  journal =      j-TKDD,
  volume =       "1",
  number =       "2",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1267066.1267069",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:48 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We investigate the general model of mining
                 associations in a temporal database, where the
                 exhibition periods of items are allowed to be different
                 from one to another. The database is divided into
                 partitions according to the time granularity imposed.
                 Such temporal association rules allow us to observe
                 short-term but interesting patterns that are absent
                 when the whole range of the database is evaluated
                 altogether. Prior work may omit some temporal
                 association rules and thus have limited practicability.
                 To remedy this and to give more precise frequent
                 exhibition periods of frequent temporal itemsets, we
                 devise an efficient algorithm {\em Twain\/} (standing
                 for {\em TWo end AssocIation miNer\/}). {\em Twain\/}
                 not only generates frequent patterns with more precise
                 frequent exhibition periods, but also discovers more
                 interesting frequent patterns. {\em Twain\/} employs
                 Start time and End time of each item to provide precise
                 frequent exhibition period while progressively handling
                 itemsets from one partition to another. Along with one
                 scan of the database, {\em Twain\/} can generate
                 frequent 2-itemsets directly according to the
                 cumulative filtering threshold. Then, {\em Twain\/}
                 adopts the scan reduction technique to generate all
                 frequent $k$-itemsets ($k$ > 2) from the generated
                 frequent 2-itemsets. Theoretical properties of {\em
                 Twain\/} are derived as well in this article. The
                 experimental results show that {\em Twain\/}
                 outperforms the prior works in the quality of frequent
                 patterns, execution time, I/O cost, CPU overhead and
                 scalability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Association; temporal",
}

@Article{Bayardop:2007:ISI,
  author =       "Roberto Bayardop and Kristin P. Bennett and Gautam Das
                 and Dimitrios Gunopulos and Johannes Gunopulos",
  title =        "Introduction to special issue {ACM SIGKDD 2006}",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "9:1--9:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297333",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bohm:2007:RPF,
  author =       "Christian B{\"o}hm and Christos Faloutsos and Jia-Yu
                 Pan and Claudia Plant",
  title =        "{RIC}: {Parameter-free} noise-robust clustering",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "10:1--10:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297334",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How do we find a {\em natural\/} clustering of a
                 real-world point set which contains an unknown number
                 of clusters with different shapes, and which may be
                 contaminated by noise? As most clustering algorithms
                 were designed with certain assumptions (Gaussianity),
                 they often require the user to give input parameters,
                 and are sensitive to noise. In this article, we propose
                 a robust framework for determining a natural clustering
                 of a given dataset, based on the minimum description
                 length (MDL) principle. The proposed framework, {\em
                 robust information-theoretic clustering (RIC)}, is
                 orthogonal to any known clustering algorithm: Given a
                 preliminary clustering, RIC purifies these clusters
                 from noise, and adjusts the clusterings such that it
                 simultaneously determines the most natural amount and
                 shape (subspace) of the clusters. Our RIC method can be
                 combined with any clustering technique ranging from
                 K-means and K-medoids to advanced methods such as
                 spectral clustering. In fact, RIC is even able to
                 purify and improve an initial coarse clustering, even
                 if we start with very simple methods. In an extension,
                 we propose a fully automatic stand-alone clustering
                 method and efficiency improvements. RIC scales well
                 with the dataset size. Extensive experiments on
                 synthetic and real-world datasets validate the proposed
                 RIC framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Clustering; data summarization; noise robustness;
                 parameter-free data mining",
}

@Article{Mei:2007:SAF,
  author =       "Qiaozhu Mei and Dong Xin and Hong Cheng and Jiawei Han
                 and Chengxiang Zhai",
  title =        "Semantic annotation of frequent patterns",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "11:1--11:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297335",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Using frequent patterns to analyze data has been one
                 of the fundamental approaches in many data mining
                 applications. Research in frequent pattern mining has
                 so far mostly focused on developing efficient
                 algorithms to discover various kinds of frequent
                 patterns, but little attention has been paid to the
                 important next step --- interpreting the discovered
                 frequent patterns. Although the compression and
                 summarization of frequent patterns has been studied in
                 some recent work, the proposed techniques there can
                 only annotate a frequent pattern with nonsemantical
                 information (e.g., support), which provides only
                 limited help for a user to understand the
                 patterns.\par

                 In this article, we study the novel problem of
                 generating semantic annotations for frequent patterns.
                 The goal is to discover the hidden meanings of a
                 frequent pattern by annotating it with in-depth,
                 concise, and structured information. We propose a
                 general approach to generate such an annotation for a
                 frequent pattern by constructing its context model,
                 selecting informative context indicators, and
                 extracting representative transactions and semantically
                 similar patterns. This general approach can well
                 incorporate the user's prior knowledge, and has
                 potentially many applications, such as generating a
                 dictionary-like description for a pattern, finding
                 synonym patterns, discovering semantic relations, and
                 summarizing semantic classes of a set of frequent
                 patterns. Experiments on different datasets show that
                 our approach is effective in generating semantic
                 pattern annotations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Frequent pattern; pattern annotation; pattern context;
                 pattern semantic analysis",
}

@Article{Koren:2007:MEP,
  author =       "Yehuda Koren and Stephen C. North and Chris Volinsky",
  title =        "Measuring and extracting proximity graphs in
                 networks",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297336",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Measuring distance or some other form of proximity
                 between objects is a standard data mining tool.
                 Connection subgraphs were recently proposed as a way to
                 demonstrate proximity between nodes in networks. We
                 propose a new way of measuring and extracting proximity
                 in networks called ``cycle-free effective conductance''
                 (CFEC). Importantly, the measured proximity is
                 accompanied with a {\em proximity subgraph\/} which
                 allows assessing and understanding measured values. Our
                 proximity calculation can handle more than two
                 endpoints, directed edges, is statistically well
                 behaved, and produces an effectiveness score for the
                 computed subgraphs. We provide an efficient algorithm
                 to measure and extract proximity. Also, we report
                 experimental results and show examples for four large
                 network datasets: a telecommunications calling graph,
                 the IMDB actors graph, an academic coauthorship
                 network, and a movie recommendation system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Connection subgraph; cycle-free escape probability;
                 escape probability; graph mining; proximity; proximity
                 subgraph; random walk",
}

@Article{Ihler:2007:LDE,
  author =       "Alexander Ihler and Jon Hutchins and Padhraic Smyth",
  title =        "Learning to detect events with {Markov}-modulated
                 {Poisson} processes",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297337",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Time-series of count data occur in many different
                 contexts, including Internet navigation logs, freeway
                 traffic monitoring, and security logs associated with
                 buildings. In this article we describe a framework for
                 detecting anomalous events in such data using an
                 unsupervised learning approach. Normal periodic
                 behavior is modeled via a time-varying Poisson process
                 model, which in turn is modulated by a hidden Markov
                 process that accounts for bursty events. We outline a
                 Bayesian framework for learning the parameters of this
                 model from count time-series. Two large real-world
                 datasets of time-series counts are used as testbeds to
                 validate the approach, consisting of freeway traffic
                 data and logs of people entering and exiting a
                 building. We show that the proposed model is
                 significantly more accurate at detecting known events
                 than a more traditional threshold-based technique. We
                 also describe how the model can be used to investigate
                 different degrees of periodicity in the data, including
                 systematic day-of-week and time-of-day effects, and to
                 make inferences about different aspects of events such
                 as number of vehicles or people involved. The results
                 indicate that the Markov-modulated Poisson framework
                 provides a robust and accurate framework for adaptively
                 and autonomously learning how to separate unusual
                 bursty events from traces of normal human activity.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Event detection; Markov modulated; Poisson",
}

@Article{Gionis:2007:ADM,
  author =       "Aristides Gionis and Heikki Mannila and Taneli
                 Mielik{\"a}inen and Panayiotis Tsaparas",
  title =        "Assessing data mining results via swap randomization",
  journal =      j-TKDD,
  volume =       "1",
  number =       "3",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2007",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1297332.1297338",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:58:56 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The problem of assessing the significance of data
                 mining results on high-dimensional 0--1 datasets has
                 been studied extensively in the literature. For
                 problems such as mining frequent sets and finding
                 correlations, significance testing can be done by
                 standard statistical tests such as chi-square, or other
                 methods. However, the results of such tests depend only
                 on the specific attributes and not on the dataset as a
                 whole. Moreover, the tests are difficult to apply to
                 sets of patterns or other complex results of data
                 mining algorithms. In this article, we consider a
                 simple randomization technique that deals with this
                 shortcoming. The approach consists of producing random
                 datasets that have the same row and column margins as
                 the given dataset, computing the results of interest on
                 the randomized instances and comparing them to the
                 results on the actual data. This randomization
                 technique can be used to assess the results of many
                 different types of data mining algorithms, such as
                 frequent sets, clustering, and spectral analysis. To
                 generate random datasets with given margins, we use
                 variations of a Markov chain approach which is based on
                 a simple swap operation. We give theoretical results on
                 the efficiency of different randomization methods, and
                 apply the swap randomization method to several
                 well-known datasets. Our results indicate that for some
                 datasets the structure discovered by the data mining
                 algorithms is expected, given the row and column
                 margins of the datasets, while for other datasets the
                 discovered structure conveys information that is not
                 captured by the margin counts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "0--1 data; randomization tests; Significance testing;
                 swaps",
}

@Article{Tang:2008:TTA,
  author =       "Lei Tang and Huan Liu and Jianping Zhang and Nitin
                 Agarwal and John J. Salerno",
  title =        "Topic taxonomy adaptation for group profiling",
  journal =      j-TKDD,
  volume =       "1",
  number =       "4",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324172.1324173",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:07 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A topic taxonomy is an effective representation that
                 describes salient features of virtual groups or online
                 communities. A topic taxonomy consists of topic nodes.
                 Each internal node is defined by its vertical path
                 (i.e., ancestor and child nodes) and its horizontal
                 list of attributes (or terms). In a text-dominant
                 environment, a topic taxonomy can be used to flexibly
                 describe a group's interests with varying granularity.
                 However, the stagnant nature of a taxonomy may fail to
                 timely capture the dynamic change of a group's
                 interest. This article addresses the problem of how to
                 adapt a topic taxonomy to the accumulated data that
                 reflects the change of a group's interest to achieve
                 dynamic group profiling. We first discuss the issues
                 related to topic taxonomy. We next formulate taxonomy
                 adaptation as an optimization problem to find the
                 taxonomy that best fits the data. We then present a
                 viable algorithm that can efficiently accomplish
                 taxonomy adaptation. We conduct extensive experiments
                 to evaluate our approach's efficacy for group
                 profiling, compare the approach with some alternatives,
                 and study its performance for dynamic group profiling.
                 While pointing out various applications of taxonomy
                 adaption, we suggest some future work that can take
                 advantage of burgeoning Web 2.0 services for online
                 targeted marketing, counterterrorism in connecting
                 dots, and community tracking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "dynamic profiling; group interest; taxonomy
                 adjustment; text hierarchical classification; Topic
                 taxonomy",
}

@Article{Cormode:2008:FHH,
  author =       "Graham Cormode and Flip Korn and S. Muthukrishnan and
                 Divesh Srivastava",
  title =        "Finding hierarchical heavy hitters in streaming data",
  journal =      j-TKDD,
  volume =       "1",
  number =       "4",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324172.1324174",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:07 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Data items that arrive online as streams typically
                 have attributes which take values from one or more
                 hierarchies (time and geographic location, source and
                 destination IP addresses, etc.). Providing an aggregate
                 view of such data is important for summarization,
                 visualization, and analysis. We develop an aggregate
                 view based on certain organized sets of large-valued
                 regions (``heavy hitters'') corresponding to
                 hierarchically discounted frequency counts. We formally
                 define the notion of {\em hierarchical heavy hitters\/}
                 (HHHs). We first consider computing (approximate) HHHs
                 over a data stream drawn from a single hierarchical
                 attribute. We formalize the problem and give
                 deterministic algorithms to find them in a single pass
                 over the input.\par

                 In order to analyze a wider range of realistic data
                 streams (e.g., from IP traffic-monitoring
                 applications), we generalize this problem to multiple
                 dimensions. Here, the semantics of HHHs are more
                 complex, since a ``child'' node can have multiple
                 ``parent'' nodes. We present online algorithms that
                 find approximate HHHs in one pass, with provable
                 accuracy guarantees. The product of hierarchical
                 dimensions forms a mathematical lattice structure. Our
                 algorithms exploit this structure, and so are able to
                 track approximate HHHs using only a small, fixed number
                 of statistics per stored item, regardless of the number
                 of dimensions.\par

                 We show experimentally, using real data, that our
                 proposed algorithms yields outputs which are very
                 similar (virtually identical, in many cases) to offline
                 computations of the exact solutions, whereas
                 straightforward heavy-hitters-based approaches give
                 significantly inferior answer quality. Furthermore, the
                 proposed algorithms result in an order of magnitude
                 savings in data structure size while performing
                 competitively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "approximation algorithms; Data mining; network data
                 analysis",
}

@Article{Somaiya:2008:LCU,
  author =       "Manas Somaiya and Christopher Jermaine and Sanjay
                 Ranka",
  title =        "Learning correlations using the mixture-of-subsets
                 model",
  journal =      j-TKDD,
  volume =       "1",
  number =       "4",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324172.1324175",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:07 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Using a mixture of random variables to model data is a
                 tried-and-tested method common in data mining, machine
                 learning, and statistics. By using mixture modeling it
                 is often possible to accurately model even complex,
                 multimodal data via very simple components. However,
                 the classical mixture model assumes that a data point
                 is generated by a single component in the model. A lot
                 of datasets can be modeled closer to the underlying
                 reality if we drop this restriction. We propose a
                 probabilistic framework, the {\em mixture-of-subsets
                 (MOS) model}, by making two fundamental changes to the
                 classical mixture model. First, we allow a data point
                 to be generated by a set of components, rather than
                 just a single component. Next, we limit the number of
                 data attributes that each component can influence. We
                 also propose an EM framework to learn the MOS model
                 from a dataset, and experimentally evaluate it on real,
                 high-dimensional datasets. Our results show that the
                 MOS model learned from the data represents the
                 underlying nature of the data accurately.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "EM algorithm; high-dimensional data; Mixture
                 modeling",
}

@Article{Halkidi:2008:CFB,
  author =       "M. Halkidi and D. Gunopulos and M. Vazirgiannis and N.
                 Kumar and C. Domeniconi",
  title =        "A clustering framework based on subjective and
                 objective validity criteria",
  journal =      j-TKDD,
  volume =       "1",
  number =       "4",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1324172.1324176",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:07 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering, as an unsupervised learning process is a
                 challenging problem, especially in cases of
                 high-dimensional datasets. Clustering result quality
                 can benefit from user constraints and objective
                 validity assessment. In this article, we propose a
                 semisupervised framework for learning the weighted
                 Euclidean subspace, where the best clustering can be
                 achieved. Our approach capitalizes on: (i) user
                 constraints; and (ii) the quality of intermediate
                 clustering results in terms of their structural
                 properties. The proposed framework uses the clustering
                 algorithm and the validity measure as its parameters.
                 We develop and discuss algorithms for learning and
                 tuning the weights of contributing dimensions and
                 defining the ``best'' clustering obtained by satisfying
                 user constraints. Experimental results on benchmark
                 datasets demonstrate the superiority of the proposed
                 approach in terms of improved clustering accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "cluster validity; data mining; Semisupervised
                 learning; similarity measure learning; space learning",
}

@Article{Zaki:2008:ISI,
  author =       "Mohammed J. Zaki and George Karypis and Jiong Yang and
                 Wei Wang",
  title =        "Introduction to special issue on bioinformatics",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342321",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jin:2008:CMM,
  author =       "Ying Jin and T. M. Murali and Naren Ramakrishnan",
  title =        "Compositional mining of multirelational biological
                 datasets",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342322",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "High-throughput biological screens are yielding
                 ever-growing streams of information about multiple
                 aspects of cellular activity. As more and more
                 categories of datasets come online, there is a
                 corresponding multitude of ways in which inferences can
                 be chained across them, motivating the need for
                 compositional data mining algorithms. In this article,
                 we argue that such compositional data mining can be
                 effectively realized by functionally cascading
                 redescription mining and biclustering algorithms as
                 primitives. Both these primitives mirror shifts of
                 vocabulary that can be composed in arbitrary ways to
                 create rich chains of inferences. Given a relational
                 database and its schema, we show how the schema can be
                 automatically compiled into a compositional data mining
                 program, and how different domains in the schema can be
                 related through logical sequences of biclustering and
                 redescription invocations. This feature allows us to
                 rapidly prototype new data mining applications,
                 yielding greater understanding of scientific datasets.
                 We describe two applications of compositional data
                 mining: (i) matching terms across categories of the
                 Gene Ontology and (ii) understanding the molecular
                 mechanisms underlying stress response in human cells.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Biclustering; bioinformatics; compositional data
                 mining; inductive logic programming; redescription
                 mining",
}

@Article{Sahay:2008:DSB,
  author =       "Saurav Sahay and Sougata Mukherjea and Eugene
                 Agichtein and Ernest V. Garcia and Shamkant B. Navathe
                 and Ashwin Ram",
  title =        "Discovering semantic biomedical relations utilizing
                 the {Web}",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342323",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "To realize the vision of a Semantic Web for Life
                 Sciences, discovering relations between resources is
                 essential. It is very difficult to automatically
                 extract relations from Web pages expressed in natural
                 language formats. On the other hand, because of the
                 explosive growth of information, it is difficult to
                 manually extract the relations. In this paper we
                 present techniques to automatically discover relations
                 between biomedical resources from the Web. For this
                 purpose we retrieve relevant information from Web
                 Search engines and Pubmed database using various
                 lexico-syntactic patterns as queries over SOAP web
                 services. The patterns are initially handcrafted but
                 can be progressively learnt. The extracted relations
                 can be used to construct and augment ontologies and
                 knowledge bases. Experiments are presented for general
                 biomedical relation discovery and domain specific
                 search to show the usefulness of our technique.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Ontology construction; relation identification",
}

@Article{Ye:2008:DSA,
  author =       "Jieping Ye and Jianhui Chen and Ravi Janardan and
                 Sudhir Kumar",
  title =        "Developmental stage annotation of \bioname{Drosophila}
                 gene expression pattern images via an entire solution
                 path for {LDA}",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342324",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/string-matching.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Gene expression in a developing embryo occurs in
                 particular cells (spatial patterns) in a time-specific
                 manner (temporal patterns), which leads to the
                 differentiation of cell fates. Images of a
                 \bioname{Drosophila melanogaster} embryo at a given
                 developmental stage, showing a particular gene
                 expression pattern revealed by a gene-specific probe,
                 can be compared for spatial overlaps. The comparison is
                 fundamentally important to formulating and testing gene
                 interaction hypotheses. Expression pattern comparison
                 is most biologically meaningful when images from a
                 similar time point (developmental stage) are compared.
                 In this paper, we present LdaPath, a novel formulation
                 of Linear Discriminant Analysis (LDA) for automatic
                 developmental stage range classification. It employs
                 multivariate linear regression with the {$ L_1 $}-norm
                 penalty controlled by a regularization parameter for
                 feature extraction and visualization. LdaPath computes
                 an entire solution path for all values of
                 regularization parameter with essentially the same
                 computational cost as fitting one LDA model. Thus, it
                 facilitates efficient model selection. It is based on
                 the equivalence relationship between LDA and the least
                 squares method for multiclass classifications. This
                 equivalence relationship is established under a mild
                 condition, which we show empirically to hold for many
                 high-dimensional datasets, such as expression pattern
                 images. Our experiments on a collection of 2705
                 expression pattern images show the effectiveness of the
                 proposed algorithm. Results also show that the LDA
                 model resulting from LdaPath is sparse, and irrelevant
                 features may be removed. Thus, LdaPath provides a
                 general framework for simultaneous feature selection
                 and feature extraction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "dimensionality reduction; Gene expression pattern
                 image; linear discriminant analysis; linear
                 regression",
}

@Article{Lu:2008:ADA,
  author =       "Yijuan Lu and Qi Tian and Jennifer Neary and Feng Liu
                 and Yufeng Wang",
  title =        "Adaptive discriminant analysis for microarray-based
                 classification",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342325",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Microarray technology has generated enormous amounts
                 of high-dimensional gene expression data, providing a
                 unique platform for exploring gene regulatory networks.
                 However, the curse of dimensionality plagues effort to
                 analyze these high throughput data. Linear Discriminant
                 Analysis (LDA) and Biased Discriminant Analysis (BDA)
                 are two popular techniques for dimension reduction,
                 which pay attention to different roles of the positive
                 and negative samples in finding discriminating
                 subspace. However, the drawbacks of these two methods
                 are obvious: LDA has limited efficiency in classifying
                 sample data from subclasses with different
                 distributions, and BDA does not account for the
                 underlying distribution of negative samples.\par

                 In this paper, we propose a novel dimension reduction
                 technique for microarray analysis: Adaptive
                 Discriminant Analysis (ADA), which effectively exploits
                 favorable attributes of both BDA and LDA and avoids
                 their unfavorable ones. ADA can find a good
                 discriminative subspace with adaptation to different
                 sample distributions. It not only alleviates the
                 problem of high dimensionality, but also enhances the
                 classification performance in the subspace with
                 na{\"\i}ve Bayes classifier. To learn the best model
                 fitting the real scenario, boosted Adaptive
                 Discriminant Analysis is further proposed. Extensive
                 experiments on the yeast cell cycle regulation data
                 set, and the expression data of the red blood cell
                 cycle in malaria parasite {\em Plasmodium falciparum\/}
                 demonstrate the superior performance of ADA and boosted
                 ADA. We also present some putative genes of specific
                 functional classes predicted by boosted ADA. Their
                 potential functionality is confirmed by independent
                 predictions based on Gene Ontology, demonstrating that
                 ADA and boosted ADA are effective dimension reduction
                 methods for microarray-based classification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "ADA; BDA; boosted ADA; dimension reduction; LDA;
                 microarray",
}

@Article{Hashimoto:2008:NEP,
  author =       "Kosuke Hashimoto and Kiyoko Flora Aoki-Kinoshita and
                 Nobuhisa Ueda and Minoru Kanehisa and Hiroshi
                 Mamitsuka",
  title =        "A new efficient probabilistic model for mining labeled
                 ordered trees applied to glycobiology",
  journal =      j-TKDD,
  volume =       "2",
  number =       "1",
  pages =        "6:1--6:??",
  month =        mar,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1342320.1342326",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:18 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Mining frequent patterns from large datasets is an
                 important issue in data mining. Recently, complex and
                 unstructured (or semi-structured) datasets have
                 appeared as targets for major data mining applications,
                 including text mining, web mining and bioinformatics.
                 Our work focuses on labeled ordered trees, which are
                 typically semi-structured datasets. In bioinformatics,
                 carbohydrate sugar chains, or glycans, can be modeled
                 as labeled ordered trees. Glycans are the third major
                 class of biomolecules, having important roles in
                 signaling and recognition. For mining labeled ordered
                 trees, we propose a new probabilistic model and its
                 efficient learning scheme which significantly improves
                 the time and space complexity of an existing
                 probabilistic model for labeled ordered trees. We
                 evaluated the performance of the proposed model,
                 comparing it with those of other probabilistic models,
                 using synthetic as well as real datasets from
                 glycobiology. Experimental results showed that the
                 proposed model drastically reduced the computation time
                 of the competing model, keeping the predictive power
                 and avoiding overfitting to the training data. Finally,
                 we assessed our results on real data from a variety of
                 biological viewpoints, verifying known facts in
                 glycobiology.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Expectation-maximization; labeled ordered trees;
                 maximum likelihood; probabilistic models",
}

@Article{Ge:2008:JCA,
  author =       "Rong Ge and Martin Ester and Byron J. Gao and Zengjian
                 Hu and Binay Bhattacharya and Boaz Ben-Moshe",
  title =        "Joint cluster analysis of attribute data and
                 relationship data: {The} connected $k$-center problem,
                 algorithms and applications",
  journal =      j-TKDD,
  volume =       "2",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376815.1376816",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:30 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Attribute data and relationship data are two principal
                 types of data, representing the intrinsic and extrinsic
                 properties of entities. While attribute data have been
                 the main source of data for cluster analysis,
                 relationship data such as social networks or metabolic
                 networks are becoming increasingly available. It is
                 also common to observe both data types carry
                 complementary information such as in market
                 segmentation and community identification, which calls
                 for a joint cluster analysis of both data types so as
                 to achieve better results. In this article, we
                 introduce the novel Connected $k$-Center ({\em CkC\/})
                 problem, a clustering model taking into account
                 attribute data as well as relationship data. We analyze
                 the complexity of the problem and prove its
                 NP-hardness. Therefore, we analyze the approximability
                 of the problem and also present a constant factor
                 approximation algorithm. For the special case of the
                 {\em CkC\/} problem where the relationship data form a
                 tree structure, we propose a dynamic programming method
                 giving an optimal solution in polynomial time. We
                 further present NetScan, a heuristic algorithm that is
                 efficient and effective for large real databases. Our
                 extensive experimental evaluation on real datasets
                 demonstrates the meaningfulness and accuracy of the
                 NetScan results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "approximation algorithms; Attribute data; community
                 identification; document clustering; joint cluster
                 analysis; market segmentation; NP-hardness;
                 relationship data",
}

@Article{Gupta:2008:BBC,
  author =       "Gunjan Gupta and Joydeep Ghosh",
  title =        "{Bregman} bubble clustering: a robust framework for
                 mining dense clusters",
  journal =      j-TKDD,
  volume =       "2",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376815.1376817",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:30 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In classical clustering, each data point is assigned
                 to at least one cluster. However, in many applications
                 only a small subset of the available data is relevant
                 for the problem and the rest needs to be ignored in
                 order to obtain good clusters. Certain nonparametric
                 density-based clustering methods find the most relevant
                 data as multiple dense regions, but such methods are
                 generally limited to low-dimensional data and do not
                 scale well to large, high-dimensional datasets. Also,
                 they use a specific notion of ``distance'', typically
                 Euclidean or Mahalanobis distance, which further limits
                 their applicability. On the other hand, the recent One
                 Class Information Bottleneck (OC-IB) method is fast and
                 works on a large class of distortion measures known as
                 Bregman Divergences, but can only find a {\em single\/}
                 dense region. This article presents a broad framework
                 for finding $k$ dense clusters while ignoring the rest
                 of the data. It includes a seeding algorithm that can
                 automatically determine a suitable value for {\em k}.
                 When $k$ is forced to 1, our method gives rise to an
                 improved version of OC-IB with optimality guarantees.
                 We provide a generative model that yields the proposed
                 iterative algorithm for finding $k$ dense regions as a
                 special case. Our analysis reveals an interesting and
                 novel connection between the problem of finding dense
                 regions and exponential mixture models; a hard model
                 corresponding to $k$ exponential mixtures with a
                 uniform background results in a set of $k$ dense
                 clusters. The proposed method describes a highly
                 scalable algorithm for finding multiple dense regions
                 that works with any Bregman Divergence, thus extending
                 density based clustering to a variety of non-Euclidean
                 problems not addressable by earlier methods. We present
                 empirical results on three artificial, two microarray
                 and one text dataset to show the relevance and
                 effectiveness of our methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Bregman divergences; Density-based clustering;
                 expectation maximization; exponential family; One Class
                 classification",
}

@Article{Tan:2008:TMG,
  author =       "Henry Tan and Fedja Hadzic and Tharam S. Dillon and
                 Elizabeth Chang and Ling Feng",
  title =        "Tree model guided candidate generation for mining
                 frequent subtrees from {XML} documents",
  journal =      j-TKDD,
  volume =       "2",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376815.1376818",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:30 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Due to the inherent flexibilities in both structure
                 and semantics, XML association rules mining faces few
                 challenges, such as: a more complicated hierarchical
                 data structure and ordered data context. Mining
                 frequent patterns from XML documents can be recast as
                 mining frequent tree structures from a database of XML
                 documents. In this study, we model a database of XML
                 documents as a database of rooted labeled ordered
                 subtrees. In particular, we are mainly concerned with
                 mining frequent induced and embedded ordered subtrees.
                 Our main contributions are as follows. We describe our
                 unique {\em embedding list\/} representation of the
                 tree structure, which enables efficient implementation
                 of our {\em Tree Model Guided\/} ({\em TMG\/})
                 candidate generation. {\em TMG\/} is an optimal,
                 nonredundant enumeration strategy that enumerates all
                 the valid candidates that conform to the structural
                 aspects of the data. We show through a mathematical
                 model and experiments that {\em TMG\/} has better
                 complexity compared to the commonly used join approach.
                 In this article, we propose two algorithms, MB3-Miner
                 and iMB3-Miner. MB3-Miner mines embedded subtrees.
                 iMB3-Miner mines induced and/or embedded subtrees by
                 using the {\em maximum level of embedding constraint}.
                 Our experiments with both synthetic and real datasets
                 against two well-known algorithms for mining induced
                 and embedded subtrees, demonstrate the effectiveness
                 and the efficiency of the proposed techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "FREQT; TMG; Tree mining; tree model guided;
                 TreeMiner",
}

@Article{Islam:2008:STS,
  author =       "Aminul Islam and Diana Inkpen",
  title =        "Semantic text similarity using corpus-based word
                 similarity and string similarity",
  journal =      j-TKDD,
  volume =       "2",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jul,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1376815.1376819",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:30 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present a method for measuring the semantic
                 similarity of texts using a corpus-based measure of
                 semantic word similarity and a normalized and modified
                 version of the Longest Common Subsequence (LCS) string
                 matching algorithm. Existing methods for computing text
                 similarity have focused mainly on either large
                 documents or individual words. We focus on computing
                 the similarity between two sentences or two short
                 paragraphs. The proposed method can be exploited in a
                 variety of applications involving textual knowledge
                 representation and knowledge discovery. Evaluation
                 results on two different data sets show that our method
                 outperforms several competing methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "corpus-based measures; Semantic similarity of words;
                 similarity of short texts",
}

@Article{Sun:2008:ITA,
  author =       "Jimeng Sun and Dacheng Tao and Spiros Papadimitriou
                 and Philip S. Yu and Christos Faloutsos",
  title =        "Incremental tensor analysis: {Theory} and
                 applications",
  journal =      j-TKDD,
  volume =       "2",
  number =       "3",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1409620.1409621",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:41 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How do we find patterns in author-keyword
                 associations, evolving over time? Or in data cubes
                 (tensors), with product-branchcustomer sales
                 information? And more generally, how to summarize
                 high-order data cubes (tensors)? How to incrementally
                 update these patterns over time? Matrix decompositions,
                 like principal component analysis (PCA) and variants,
                 are invaluable tools for mining, dimensionality
                 reduction, feature selection, rule identification in
                 numerous settings like streaming data, text, graphs,
                 social networks, and many more settings. However, they
                 have only two orders (i.e., matrices, like author and
                 keyword in the previous example).\par

                 We propose to envision such higher-order data as
                 tensors, and tap the vast literature on the topic.
                 However, these methods do not necessarily scale up, let
                 alone operate on semi-infinite streams. Thus, we
                 introduce a general framework, incremental tensor
                 analysis (ITA), which efficiently computes a compact
                 summary for high-order and high-dimensional data, and
                 also reveals the hidden correlations. Three variants of
                 ITA are presented: (1) dynamic tensor analysis (DTA);
                 (2) streaming tensor analysis (STA); and (3)
                 window-based tensor analysis (WTA). In particular, we
                 explore several fundamental design trade-offs such as
                 space efficiency, computational cost, approximation
                 accuracy, time dependency, and model complexity.\par

                 We implement all our methods and apply them in several
                 real settings, such as network anomaly detection,
                 multiway latent semantic indexing on citation networks,
                 and correlation study on sensor measurements. Our
                 empirical studies show that the proposed methods are
                 fast and accurate and that they find interesting
                 patterns and outliers on the real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "multilinear algebra; stream mining; Tensor",
}

@Article{Mangasarian:2008:PPC,
  author =       "Olvi L. Mangasarian and Edward W. Wild and Glenn M.
                 Fung",
  title =        "Privacy-preserving classification of vertically
                 partitioned data via random kernels",
  journal =      j-TKDD,
  volume =       "2",
  number =       "3",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1409620.1409622",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:41 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We propose a novel privacy-preserving support vector
                 machine (SVM) classifier for a data matrix $A$ whose
                 input feature columns are divided into groups belonging
                 to different entities. Each entity is unwilling to
                 share its group of columns or make it public. Our
                 classifier is based on the concept of a reduced kernel
                 $ k(A, B \prime)$, where $ B \prime $ is the transpose
                 of a random matrix $B$. The column blocks of $B$
                 corresponding to the different entities are privately
                 generated by each entity and never made public. The
                 proposed linear or nonlinear SVM classifier, which is
                 public but does not reveal any of the privately held
                 data, has accuracy comparable to that of an ordinary
                 SVM classifier that uses the entire set of input
                 features directly.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Privacy preserving classification; support vector
                 machines; vertically partitioned data",
}

@Article{Lakshmanan:2008:DRA,
  author =       "Laks V. S. Lakshmanan and Raymond T. Ng and Ganesh
                 Ramesh",
  title =        "On disclosure risk analysis of anonymized itemsets in
                 the presence of prior knowledge",
  journal =      j-TKDD,
  volume =       "2",
  number =       "3",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1409620.1409623",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:41 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Decision makers of companies often face the dilemma of
                 whether to release data for knowledge discovery,
                 vis-a-vis the risk of disclosing proprietary or
                 sensitive information. Among the various methods
                 employed for ``sanitizing'' the data prior to
                 disclosure, we focus in this article on anonymization,
                 given its widespread use in practice. We do due
                 diligence to the question ``just how safe is the
                 anonymized data?'' We consider both those scenarios
                 when the hacker has no information and, more
                 realistically, when the hacker may have partial
                 information about items in the domain. We conduct our
                 analyses in the context of frequent set mining and
                 address the safety question at two different levels:
                 (i) how likely of being cracked (i.e., re-identified by
                 a hacker), are the identities of individual items and
                 (ii) how likely are sets of items cracked? For
                 capturing the prior knowledge of the hacker, we propose
                 a {\em belief function}, which amounts to an educated
                 guess of the frequency of each item. For various
                 classes of belief functions which correspond to
                 different degrees of prior knowledge, we derive
                 formulas for computing the expected number of cracks of
                 single items and for itemsets, the probability of
                 cracking the itemsets. While obtaining, exact values
                 for more general situations is computationally hard, we
                 propose a series of heuristics called the {\em
                 O-estimates}. They are easy to compute and are shown
                 fairly accurate, justified by empirical results on real
                 benchmark datasets. Based on the O-estimates, we
                 propose a recipe for the decision makers to resolve
                 their dilemma. Our recipe operates at two different
                 levels, depending on whether the data owner wants to
                 reason in terms of single items or sets of items (or
                 both). Finally, we present techniques for ascertaining
                 a hacker's knowledge of correlation in terms of
                 co-occurrence of items likely. This information
                 regarding the hacker's knowledge can be incorporated
                 into our framework of disclosure risk analysis and we
                 present experimental results demonstrating how this
                 knowledge affects the heuristic estimates we have
                 developed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "anonymization; belief function; bipartite graphs;
                 correlation; Disclosure risk; frequent itemsets;
                 hacker; matching; prior knowledge; sampling",
}

@Article{Vaidya:2008:PPD,
  author =       "Jaideep Vaidya and Chris Clifton and Murat
                 Kantarcioglu and A. Scott Patterson",
  title =        "Privacy-preserving decision trees over vertically
                 partitioned data",
  journal =      j-TKDD,
  volume =       "2",
  number =       "3",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2008",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1409620.1409624",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:41 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Privacy and security concerns can prevent sharing of
                 data, derailing data-mining projects. Distributed
                 knowledge discovery, if done correctly, can alleviate
                 this problem. We introduce a generalized
                 privacy-preserving variant of the ID3 algorithm for
                 vertically partitioned data distributed over two or
                 more parties. Along with a proof of security, we
                 discuss what would be necessary to make the protocols
                 completely secure. We also provide experimental
                 results, giving a first demonstration of the practical
                 complexity of secure multiparty computation-based data
                 mining.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Decision tree classification; privacy",
}

@Article{Chuang:2009:FPS,
  author =       "Kun-Ta Chuang and Hung-Leng Chen and Ming-Syan Chen",
  title =        "Feature-preserved sampling over streaming data",
  journal =      j-TKDD,
  volume =       "2",
  number =       "4",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1460797.1460798",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:51 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article, we explore a novel sampling model,
                 called {\em feature preserved sampling\/} ({\em FPS\/})
                 that sequentially generates a high-quality sample over
                 sliding windows. The sampling quality we consider
                 refers to the degree of consistency between the sample
                 proportion and the population proportion of each
                 attribute value in a window. Due to the time-variant
                 nature of real-world datasets, users are more likely to
                 be interested in the most recent data. However,
                 previous works have not been able to generate a
                 high-quality sample over sliding windows that precisely
                 preserves up-to-date population characteristics.
                 Motivated by this shortcoming, we have developed the
                 {\em FPS\/} algorithm, which has several advantages:
                 (1) it sequentially generates a sample from a
                 time-variant data source over sliding windows; (2) the
                 execution time of {\em FPS\/} is linear with respect to
                 the database size; (3) the {\em relative\/}
                 proportional differences between the sample proportions
                 and population proportions of most distinct attribute
                 values are guaranteed to be below a specified error
                 threshold, $ \epsilon $, while the {\em relative\/}
                 proportion differences of the remaining attribute
                 values are as close to $ \epsilon $ as possible, which
                 ensures that the generated sample is of high quality;
                 (4) the sample rate is close to the user specified rate
                 so that a high quality sampling result can be obtained
                 without increasing the sample size; (5) by a thorough
                 analytical and empirical study, we prove that {\em
                 FPS\/} has acceptable space overheads, especially when
                 the attribute values have Zipfian distributions, and
                 {\em FPS\/} can also excellently preserve the
                 population proportion of multivariate features in the
                 sample; and (6) {\em FPS\/} can be applied to infinite
                 streams and finite datasets equally, and the generated
                 samples can be used for various applications. Our
                 experiments on both real and synthetic data validate
                 that {\em FPS\/} can effectively obtain a high quality
                 sample of the desired size. In addition, while using
                 the sample generated by {\em FPS\/} in various mining
                 applications, a significant improvement in efficiency
                 can be achieved without compromising the model's
                 precision.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "sampling; Streaming mining",
}

@Article{Jiang:2009:MFC,
  author =       "Daxin Jiang and Jian Pei",
  title =        "Mining frequent cross-graph quasi-cliques",
  journal =      j-TKDD,
  volume =       "2",
  number =       "4",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1460797.1460799",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:51 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Joint mining of multiple datasets can often discover
                 interesting, novel, and reliable patterns which cannot
                 be obtained solely from any single source. For example,
                 in bioinformatics, jointly mining multiple gene
                 expression datasets obtained by different labs or
                 during various biological processes may overcome the
                 heavy noise in the data. Moreover, by joint mining of
                 gene expression data and protein-protein interaction
                 data, we may discover clusters of genes which show
                 coherent expression patterns and also produce
                 interacting proteins. Such clusters may be potential
                 pathways.\par

                 In this article, we investigate a novel data mining
                 problem, {\em mining frequent cross-graph
                 quasi-cliques}, which is generalized from several
                 interesting applications in bioinformatics,
                 cross-market customer segmentation, social network
                 analysis, and Web mining. In a graph, a set of vertices
                 $S$ is a $ \gamma $-quasi-clique $ (0 < \gamma \leq 1)$
                 if each vertex $v$ in $S$ directly connects to at least
                 $ \gamma \cdot (|S| - 1)$ other vertices in $S$. Given
                 a set of graphs $ G_1, \ldots {}, G_n$ and parameter $
                 {\rm min \_ sup} (0 < {\rm min \_ sup} 1)$, a set of
                 vertices $S$ is a frequent cross-graph quasi-clique if
                 $S$ is a $ \gamma $-quasi-clique in at least $ {\rm min
                 \_ sup} \cdot n$ graphs, and there does not exist a
                 proper superset of $S$ having the property.\par

                 We build a general model, show why the complete set of
                 frequent cross-graph quasi-cliques cannot be found by
                 previous data mining methods, and study the complexity
                 of the problem. While the problem is difficult, we
                 develop practical algorithms which exploit several
                 interesting and effective techniques and heuristics to
                 efficaciously mine frequent cross-graph quasi-cliques.
                 A systematic performance study is reported on both
                 synthetic and real data sets. We demonstrate some
                 interesting and meaningful frequent cross-graph
                 quasi-cliques in bioinformatics. The experimental
                 results also show that our algorithms are efficient and
                 scalable.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "bioinformatics; clique; Graph mining; joint mining",
}

@Article{Domeniconi:2009:WCE,
  author =       "Carlotta Domeniconi and Muna Al-Razgan",
  title =        "Weighted cluster ensembles: {Methods} and analysis",
  journal =      j-TKDD,
  volume =       "2",
  number =       "4",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1460797.1460800",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:51 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Cluster ensembles offer a solution to challenges
                 inherent to clustering arising from its ill-posed
                 nature. Cluster ensembles can provide robust and stable
                 solutions by leveraging the consensus across multiple
                 clustering results, while averaging out emergent
                 spurious structures that arise due to the various
                 biases to which each participating algorithm is tuned.
                 In this article, we address the problem of combining
                 multiple {\em weighted clusters\/} that belong to
                 different subspaces of the input space. We leverage the
                 diversity of the input clusterings in order to generate
                 a consensus partition that is superior to the
                 participating ones. Since we are dealing with weighted
                 clusters, our consensus functions make use of the
                 weight vectors associated with the clusters. We
                 demonstrate the effectiveness of our techniques by
                 running experiments with several real datasets,
                 including high-dimensional text data. Furthermore, we
                 investigate in depth the issue of diversity and
                 accuracy for our ensemble methods. Our analysis and
                 experimental results show that the proposed techniques
                 are capable of producing a partition that is as good as
                 or better than the best individual clustering.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "accuracy and diversity measures; Cluster ensembles;
                 consensus functions; data mining; subspace clustering;
                 text data",
}

@Article{Zhang:2009:DGA,
  author =       "Zhenjie Zhang and Laks V. S. Lakshmanan and Anthony K.
                 H. Tung",
  title =        "On domination game analysis for microeconomic data
                 mining",
  journal =      j-TKDD,
  volume =       "2",
  number =       "4",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1460797.1460801",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 17:59:51 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Game theory is a powerful tool for analyzing the
                 competitions among manufacturers in a market. In this
                 article, we present a study on combining game theory
                 and data mining by introducing the concept of
                 domination game analysis. We present a multidimensional
                 market model, where every dimension represents one
                 attribute of a commodity. Every product or customer is
                 represented by a point in the multidimensional space,
                 and a product is said to ``dominate'' a customer if all
                 of its attributes can satisfy the requirements of the
                 customer. The expected market share of a product is
                 measured by the expected number of the buyers in the
                 customers, all of which are equally likely to buy any
                 product dominating him. A Nash equilibrium is a
                 configuration of the products achieving stable expected
                 market shares for all products. We prove that Nash
                 equilibrium in such a model can be computed in
                 polynomial time if every manufacturer tries to modify
                 its product in a round robin manner. To further improve
                 the efficiency of the computation, we also design two
                 algorithms for the manufacturers to efficiently find
                 their best response to other products in the market.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "data mining; Domination game; game theory",
}

@Article{Kriegel:2009:CHD,
  author =       "Hans-Peter Kriegel and Peer Kr{\"o}ger and Arthur
                 Zimek",
  title =        "Clustering high-dimensional data: a survey on subspace
                 clustering, pattern-based clustering, and correlation
                 clustering",
  journal =      j-TKDD,
  volume =       "3",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1497577.1497578",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "As a prolific research area in data mining, subspace
                 clustering and related problems induced a vast quantity
                 of proposed solutions. However, many publications
                 compare a new proposition --- if at all --- with one or
                 two competitors, or even with a so-called
                 ``na{\"\i}ve'' ad hoc solution, but fail to clarify the
                 exact problem definition. As a consequence, even if two
                 solutions are thoroughly compared experimentally, it
                 will often remain unclear whether both solutions tackle
                 the same problem or, if they do, whether they agree in
                 certain tacit assumptions and how such assumptions may
                 influence the outcome of an algorithm. In this survey,
                 we try to clarify: (i) the different problem
                 definitions related to subspace clustering in general;
                 (ii) the specific difficulties encountered in this
                 field of research; (iii) the varying assumptions,
                 heuristics, and intuitions forming the basis of
                 different approaches; and (iv) how several prominent
                 solutions tackle different problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "clustering; high-dimensional data; Survey",
}

@Article{Dhurandhar:2009:SAM,
  author =       "Amit Dhurandhar and Alin Dobra",
  title =        "Semi-analytical method for analyzing models and model
                 selection measures based on moment analysis",
  journal =      j-TKDD,
  volume =       "3",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1497577.1497579",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article we propose a moment-based method for
                 studying models and model selection measures. By
                 focusing on the probabilistic space of classifiers
                 induced by the classification algorithm rather than on
                 that of datasets, we obtain efficient characterizations
                 for computing the moments, which is followed by
                 visualization of the resulting formulae that are too
                 complicated for direct interpretation. By assuming the
                 data to be drawn independently and identically
                 distributed from the underlying probability
                 distribution, and by going over the space of all
                 possible datasets, we establish general relationships
                 between the generalization error, hold-out-set error,
                 cross-validation error, and leave-one-out error. We
                 later exemplify the method and the results by studying
                 the behavior of the errors for the naive Bayes
                 classifier.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "classification; generalization error; Model
                 selection",
}

@Article{Cerf:2009:CPM,
  author =       "Lo{\"\i}c Cerf and J{\'e}r{\'e}my Besson and
                 C{\'e}line Robardet and Jean-Fran{\c{c}}ois Boulicaut",
  title =        "Closed patterns meet $n$-ary relations",
  journal =      j-TKDD,
  volume =       "3",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1497577.1497580",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Set pattern discovery from binary relations has been
                 extensively studied during the last decade. In
                 particular, many complete and efficient algorithms for
                 frequent closed set mining are now available.
                 Generalizing such a task to $n$-ary relations ($ n \geq
                 2$) appears as a timely challenge. It may be important
                 for many applications, for example, when adding the
                 time dimension to the popular {\em objects\/} $ \times
                 $ {\em features\/} binary case. The generality of the
                 task (no assumption being made on the relation arity or
                 on the size of its attribute domains) makes it
                 computationally challenging. We introduce an algorithm
                 called Data-Peeler. From an $n$-ary relation, it
                 extracts all closed $n$-sets satisfying given piecewise
                 (anti) monotonic constraints. This new class of
                 constraints generalizes both monotonic and
                 antimonotonic constraints. Considering the special case
                 of ternary relations, Data-Peeler outperforms the
                 state-of-the-art algorithms CubeMiner and Trias by
                 orders of magnitude. These good performances must be
                 granted to a new clever enumeration strategy allowing
                 to efficiently enforce the closeness property. The
                 relevance of the extracted closed $n$-sets is assessed
                 on real-life 3-and 4-ary relations. Beyond natural 3-or
                 4-ary relations, expanding a relation with an
                 additional attribute can help in enforcing rather
                 abstract constraints such as the robustness with
                 respect to binarization. Furthermore, a collection of
                 closed $n$-sets is shown to be an excellent starting
                 point to compute a tiling of the dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "$n$-ary relations; Closed patterns; constraint
                 properties; constraint-based mining; tiling",
}

@Article{Angiulli:2009:DEA,
  author =       "Fabrizio Angiulli and Fabio Fassetti",
  title =        "{DOLPHIN}: an efficient algorithm for mining
                 distance-based outliers in very large datasets",
  journal =      j-TKDD,
  volume =       "3",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1497577.1497581",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this work a novel distance-based outlier detection
                 algorithm, named DOLPHIN, working on disk-resident
                 datasets and whose I/O cost corresponds to the cost of
                 sequentially reading the input dataset file twice, is
                 presented.\par

                 It is both theoretically and empirically shown that the
                 main memory usage of DOLPHIN amounts to a small
                 fraction of the dataset and that DOLPHIN has linear
                 time performance with respect to the dataset size.
                 DOLPHIN gains efficiency by naturally merging together
                 in a unified schema three strategies, namely the
                 selection policy of objects to be maintained in main
                 memory, usage of pruning rules, and similarity search
                 techniques. Importantly, similarity search is
                 accomplished by the algorithm without the need of
                 preliminarily indexing the whole dataset, as other
                 methods do.\par

                 The algorithm is simple to implement and it can be used
                 with any type of data, belonging to either metric or
                 nonmetric spaces. Moreover, a modification to the basic
                 method allows DOLPHIN to deal with the scenario in
                 which the available buffer of main memory is smaller
                 than its standard requirements. DOLPHIN has been
                 compared with state-of-the-art distance-based outlier
                 detection algorithms, showing that it is much more
                 efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Data mining; distance-based outliers; outlier
                 detection",
}

@Article{Chen:2009:BAS,
  author =       "Bee-Chung Chen and Raghu Ramakrishnan and Jude W.
                 Shavlik and Pradeep Tamma",
  title =        "Bellwether analysis: {Searching} for cost-effective
                 query-defined predictors in large databases",
  journal =      j-TKDD,
  volume =       "3",
  number =       "1",
  pages =        "5:1--5:??",
  month =        mar,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1497577.1497582",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:01 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How to mine massive datasets is a challenging problem
                 with great potential value. Motivated by this
                 challenge, much effort has concentrated on developing
                 scalable versions of machine learning algorithms.
                 However, the cost of mining large datasets is not just
                 computational; preparing the datasets into the ``right
                 form'' so that learning algorithms can be applied is
                 usually costly, due to the human labor that is
                 typically required and a large number of choices in
                 data preparation, which include selecting different
                 subsets of data and aggregating data at different
                 granularities. We make the key observation that, for a
                 number of practically motivated problems, these choices
                 can be defined using database queries and analyzed in
                 an automatic and systematic manner. Specifically, we
                 propose a new class of data-mining problem, called {\em
                 bellwether analysis}, in which the goal is to find a
                 few query-defined predictors (e.g., first week sales of
                 Peoria, IL of an item) that can be used to accurately
                 predict the result of a target query (e.g., first year
                 worldwide sales of the item) from a large number of
                 queries that define candidate predictors. To make a
                 prediction for a new item, the data needed to generate
                 such predictors has to be collected (e.g., selling the
                 new item in Peoria, IL for a week and collecting the
                 sales data). A useful predictor is one that has high
                 prediction accuracy and a low data-collection cost. We
                 call such a cost-effective predictor a {\em
                 bellwether}.\par

                 This article introduces bellwether analysis, which
                 integrates database query processing and predictive
                 modeling into a single framework, and provides scalable
                 algorithms for large datasets that cannot fit in main
                 memory. Through a series of extensive experiments, we
                 show that bellwethers do exist in real-world databases,
                 and that our computation techniques achieve good
                 efficiency on large datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "bellwether; Cost-effective prediction; data cube; OLAP
                 queries; predictive models; scalable algorithms",
}

@Article{Liu:2009:ISI,
  author =       "Huan Liu and John Salerno and Michael Young and Rakesh
                 Agrawal and Philip S. Yu",
  title =        "Introduction to special issue on social computing,
                 behavioral modeling, and prediction",
  journal =      j-TKDD,
  volume =       "3",
  number =       "2",
  pages =        "6:1--6:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1514888.1514889",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:12 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mehler:2009:ENC,
  author =       "Andrew Mehler and Steven Skiena",
  title =        "Expanding network communities from representative
                 examples",
  journal =      j-TKDD,
  volume =       "3",
  number =       "2",
  pages =        "7:1--7:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1514888.1514890",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:12 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present an approach to leverage a small subset of a
                 coherent community within a social network into a much
                 larger, more representative sample. Our problem becomes
                 identifying a small conductance subgraph containing
                 many (but not necessarily all) members of the given
                 seed set. Starting with an initial seed set
                 representing a sample of a community, we seek to
                 discover as much of the full community as
                 possible.\par

                 We present a general method for network community
                 expansion, demonstrating that our methods work well in
                 expanding communities in real world networks starting
                 from small given seed groups (20 to 400 members). Our
                 approach is marked by incremental expansion from the
                 seeds with retrospective analysis to determine the
                 ultimate boundaries of our community. We demonstrate
                 how to increase the robustness of the general approach
                 through bootstrapping multiple random partitions of the
                 input set into seed and evaluation groups.\par

                 We go beyond statistical comparisons against gold
                 standards to careful subjective evaluations of our
                 expanded communities. This process explains the causes
                 of most disagreement between our expanded communities
                 and our gold-standards --- arguing that our expansion
                 methods provide more reliable communities than can be
                 extracted from reference sources/gazetteers such as
                 Wikipedia.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "artificial intelligence; community discovery; Discrete
                 mathematics; graph theory; news analysis; social
                 networks",
}

@Article{Lin:2009:ACT,
  author =       "Yu-Ru Lin and Yun Chi and Shenghuo Zhu and Hari
                 Sundaram and Belle L. Tseng",
  title =        "Analyzing communities and their evolutions in dynamic
                 social networks",
  journal =      j-TKDD,
  volume =       "3",
  number =       "2",
  pages =        "8:1--8:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1514888.1514891",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:12 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We discover communities from social network data and
                 analyze the community evolution. These communities are
                 inherent characteristics of human interaction in online
                 social networks, as well as paper citation networks.
                 Also, communities may evolve over time, due to changes
                 to individuals' roles and social status in the network
                 as well as changes to individuals' research interests.
                 We present an innovative algorithm that deviates from
                 the traditional two-step approach to analyze community
                 evolutions. In the traditional approach, communities
                 are first detected for each time slice, and then
                 compared to determine correspondences. We argue that
                 this approach is inappropriate in applications with
                 noisy data. In this paper, we propose {\em FacetNet\/}
                 for analyzing communities and their evolutions through
                 a robust {\em unified\/} process. This novel framework
                 will discover communities and capture their evolution
                 with temporal smoothness given by historic community
                 structures. Our approach relies on formulating the
                 problem in terms of maximum a posteriori (MAP)
                 estimation, where the community structure is estimated
                 both by the observed networked data and by the prior
                 distribution given by historic community structures.
                 Then we develop an iterative algorithm, with proven low
                 time complexity, which is guaranteed to converge to an
                 optimal solution. We perform extensive experimental
                 studies, on both synthetic datasets and real datasets,
                 to demonstrate that our method discovers meaningful
                 communities and provides additional insights not
                 directly obtainable from traditional methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Community; community net; evolution; evolution net;
                 nonnegative matrix factorization; soft membership",
}

@Article{Kimura:2009:BLM,
  author =       "Masahiro Kimura and Kazumi Saito and Hiroshi Motoda",
  title =        "Blocking links to minimize contamination spread in a
                 social network",
  journal =      j-TKDD,
  volume =       "3",
  number =       "2",
  pages =        "9:1--9:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1514888.1514892",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:12 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We address the problem of minimizing the propagation
                 of undesirable things, such as computer viruses or
                 malicious rumors, by blocking a limited number of links
                 in a network, which is converse to the influence
                 maximization problem in which the most influential
                 nodes for information diffusion is searched in a social
                 network. This minimization problem is more fundamental
                 than the problem of preventing the spread of
                 contamination by removing nodes in a network. We
                 introduce two definitions for the contamination degree
                 of a network, accordingly define two contamination
                 minimization problems, and propose methods for
                 efficiently finding good approximate solutions to these
                 problems on the basis of a naturally greedy strategy.
                 Using large social networks, we experimentally
                 demonstrate that the proposed methods outperform
                 conventional link-removal methods. We also show that
                 unlike the case of blocking a limited number of nodes,
                 the strategy of removing nodes with high out-degrees is
                 not necessarily effective for these problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Contamination diffusion; link analysis; social
                 networks",
}

@Article{Agichtein:2009:MIS,
  author =       "Eugene Agichtein and Yandong Liu and Jiang Bian",
  title =        "Modeling information-seeker satisfaction in community
                 question answering",
  journal =      j-TKDD,
  volume =       "3",
  number =       "2",
  pages =        "10:1--10:??",
  month =        apr,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1514888.1514893",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Fri Apr 24 18:00:12 MDT 2009",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Question Answering Communities such as Naver, Baidu
                 Knows, and Yahoo! Answers have emerged as popular, and
                 often effective, means of information seeking on the
                 web. By posting questions for other participants to
                 answer, information seekers can obtain specific answers
                 to their questions. Users of CQA portals have already
                 contributed millions of questions, and received
                 hundreds of millions of answers from other
                 participants. However, CQA is not always effective: in
                 some cases, a user may obtain a perfect answer within
                 minutes, and in others it may require hours --- and
                 sometimes days --- until a satisfactory answer is
                 contributed. We investigate the problem of predicting
                 information seeker satisfaction in collaborative
                 question answering communities, where we attempt to
                 predict whether a question author will be satisfied
                 with the answers submitted by the community
                 participants. We present a general prediction model,
                 and develop a variety of content, structure, and
                 community-focused features for this task. Our
                 experimental results, obtained from a large-scale
                 evaluation over thousands of real questions and user
                 ratings, demonstrate the feasibility of modeling and
                 predicting asker satisfaction. We complement our
                 results with a thorough investigation of the
                 interactions and information seeking patterns in
                 question answering communities that correlate with
                 information seeker satisfaction. We also explore {\em
                 personalized\/} models of asker satisfaction, and show
                 that when sufficient interaction history exists,
                 personalization can significantly improve prediction
                 accuracy over a ``one-size-fits-all'' model. Our models
                 and predictions could be useful for a variety of
                 applications, such as user intent inference, answer
                 ranking, interface design, and query suggestion and
                 routing.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Community question answering; information seeker
                 satisfaction",
}

@Article{Torvik:2009:AND,
  author =       "Vetle I. Torvik and Neil R. Smalheiser",
  title =        "Author name disambiguation in {MEDLINE}",
  journal =      j-TKDD,
  volume =       "3",
  number =       "3",
  pages =        "11:1--11:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1552303.1552304",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:36:58 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "{\em Background\/}: We recently described
                 ``Author-ity,'' a model for estimating the probability
                 that two articles in MEDLINE, sharing the same author
                 name, were written by the same individual. Features
                 include shared title words, journal name, coauthors,
                 medical subject headings, language, affiliations, and
                 author name features (middle initial, suffix, and
                 prevalence in MEDLINE). Here we test the hypothesis
                 that the Author-ity model will suffice to disambiguate
                 author names for the vast majority of articles in
                 MEDLINE. {\em Methods\/}: Enhancements include: (a)
                 incorporating first names and their variants, email
                 addresses, and correlations between specific last names
                 and affiliation words; (b) new methods of generating
                 large unbiased training sets; (c) new methods for
                 estimating the prior probability; (d) a weighted least
                 squares algorithm for correcting transitivity
                 violations; and (e) a maximum likelihood based
                 agglomerative algorithm for computing clusters of
                 articles that represent inferred author-individuals.
                 {\em Results\/}: Pairwise comparisons were computed for
                 all author names on all 15.3 million articles in
                 MEDLINE (2006 baseline), that share last name and first
                 initial, to create Author-ity 2006, a database that has
                 each name on each article assigned to one of 6.7
                 million inferred author-individual clusters. Recall is
                 estimated at $ \approx 98.8 \% $. Lumping (putting two
                 different individuals into the same cluster) affects $
                 \approx 0.5 \% $ of clusters, whereas splitting
                 (assigning articles written by the same individual to $
                 > 1 $ cluster) affects $ \approx 2 \% $ of articles.
                 {\em Impact\/}: The Author-ity model can be applied
                 generally to other bibliographic databases. Author name
                 disambiguation allows information retrieval and data
                 integration to become {\em person-centered}, not just
                 {\em document-centered}, setting the stage for new data
                 mining and social network tools that will facilitate
                 the analysis of scholarly publishing and collaboration
                 behavior. {\em Availability\/}: The Author-ity 2006
                 database is available for nonprofit academic research,
                 and can be freely queried via
                 http://arrowsmith.psych.uic.edu.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "bibliographic databases; Name disambiguation",
}

@Article{Tu:2009:SDC,
  author =       "Li Tu and Yixin Chen",
  title =        "Stream data clustering based on grid density and
                 attraction",
  journal =      j-TKDD,
  volume =       "3",
  number =       "3",
  pages =        "12:1--12:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1552303.1552305",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:36:58 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering real-time stream data is an important and
                 challenging problem. Existing algorithms such as
                 CluStream are based on the {\em k\/} -means algorithm.
                 These clustering algorithms have difficulties finding
                 clusters of arbitrary shapes and handling outliers.
                 Further, they require the knowledge of {\em k\/} and
                 user-specified time window. To address these issues,
                 this article proposes {\em D-Stream}, a framework for
                 clustering stream data using a density-based
                 approach.\par

                 Our algorithm uses an online component that maps each
                 input data record into a grid and an offline component
                 that computes the grid density and clusters the grids
                 based on the density. The algorithm adopts a density
                 decaying technique to capture the dynamic changes of a
                 data stream and a attraction-based mechanism to
                 accurately generate cluster boundaries.\par

                 Exploiting the intricate relationships among the decay
                 factor, attraction, data density, and cluster
                 structure, our algorithm can efficiently and
                 effectively generate and adjust the clusters in real
                 time. Further, a theoretically sound technique is
                 developed to detect and remove sporadic grids mapped by
                 outliers in order to dramatically improve the space and
                 time efficiency of the system. The technique makes
                 high-speed data stream clustering feasible without
                 degrading the clustering quality. The experimental
                 results show that our algorithm has superior quality
                 and efficiency, can find clusters of arbitrary shapes,
                 and can accurately recognize the evolving behaviors of
                 real-time data streams.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "clustering; data mining; density-based algorithms;
                 Stream data",
}

@Article{Zhou:2009:LST,
  author =       "Bin Zhou and Jian Pei",
  title =        "Link spam target detection using page farms",
  journal =      j-TKDD,
  volume =       "3",
  number =       "3",
  pages =        "13:1--13:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1552303.1552306",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:36:58 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Currently, most popular Web search engines adopt some
                 link-based ranking methods such as PageRank. Driven by
                 the huge potential benefit of improving rankings of Web
                 pages, many tricks have been attempted to boost page
                 rankings. The most common way, which is known as link
                 spam, is to make up some artificially designed link
                 structures. Detecting link spam effectively is a big
                 challenge. In this article, we develop novel and
                 effective detection methods for link spam target pages
                 using page farms. The essential idea is intuitive:
                 whether a page is the beneficiary of link spam is
                 reflected by how it collects its PageRank score.
                 Technically, how a target page collects its PageRank
                 score is modeled by a page farm, which consists of
                 pages contributing a major portion of the PageRank
                 score of the target page. We propose two spamicity
                 measures based on page farms. They can be used as an
                 effective measure to check whether the pages are link
                 spam target pages. An empirical study using a newly
                 available real dataset strongly suggests that our
                 method is effective. It outperforms the
                 state-of-the-art methods like SpamRank and SpamMass in
                 both precision and recall.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Link Spam; Page Farm; PageRank",
}

@Article{Wan:2009:DBC,
  author =       "Li Wan and Wee Keong Ng and Xuan Hong Dang and Philip
                 S. Yu and Kuan Zhang",
  title =        "Density-based clustering of data streams at multiple
                 resolutions",
  journal =      j-TKDD,
  volume =       "3",
  number =       "3",
  pages =        "14:1--14:??",
  month =        jul,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1552303.1552307",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:36:58 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In data stream clustering, it is desirable to have
                 algorithms that are able to detect clusters of
                 arbitrary shape, clusters that evolve over time, and
                 clusters with noise. Existing stream data clustering
                 algorithms are generally based on an online-offline
                 approach: The online component captures synopsis
                 information from the data stream (thus, overcoming
                 real-time and memory constraints) and the offline
                 component generates clusters using the stored synopsis.
                 The online-offline approach affects the overall
                 performance of stream data clustering in various ways:
                 the ease of deriving synopsis from streaming data; the
                 complexity of data structure for storing and managing
                 synopsis; and the frequency at which the offline
                 component is used to generate clusters. In this
                 article, we propose an algorithm that (1) computes and
                 updates synopsis information in constant time; (2)
                 allows users to discover clusters at multiple
                 resolutions; (3) determines the right time for users to
                 generate clusters from the synopsis information; (4)
                 generates clusters of higher purity than existing
                 algorithms; and (5) determines the right threshold
                 function for density-based clustering based on the
                 fading model of stream data. To the best of our
                 knowledge, no existing data stream algorithms has all
                 of these features. Experimental results show that our
                 algorithm is able to detect arbitrarily shaped,
                 evolving clusters with high quality.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Data mining algorithms; density based clustering;
                 evolving data streams",
}

@Article{Mannila:2009:ATS,
  author =       "Heikki Mannila and Dimitrios Gunopulos",
  title =        "{ACM TKDD} special issue {ACM SIGKDD 2007} and {ACM
                 SIGKDD 2008}",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "15:1--15:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631163",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Asur:2009:EBF,
  author =       "Sitaram Asur and Srinivasan Parthasarathy and Duygu
                 Ucar",
  title =        "An event-based framework for characterizing the
                 evolutionary behavior of interaction graphs",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631164",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Interaction graphs are ubiquitous in many fields such
                 as bioinformatics, sociology and physical sciences.
                 There have been many studies in the literature targeted
                 at studying and mining these graphs. However, almost
                 all of them have studied these graphs from a static
                 point of view. The study of the evolution of these
                 graphs over time can provide tremendous insight on the
                 behavior of entities, communities and the flow of
                 information among them. In this work, we present an
                 event-based characterization of critical behavioral
                 patterns for temporally varying interaction graphs. We
                 use nonoverlapping snapshots of interaction graphs and
                 develop a framework for capturing and identifying
                 interesting events from them. We use these events to
                 characterize complex behavioral patterns of individuals
                 and communities over time. We show how semantic
                 information can be incorporated to reason about
                 community-behavior events. We also demonstrate the
                 application of behavioral patterns for the purposes of
                 modeling evolution, link prediction and influence
                 maximization. Finally, we present a diffusion model for
                 evolving networks, based on our framework.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "diffusion of innovations; Dynamic interaction
                 networks; evolutionary analysis",
}

@Article{Chi:2009:ESC,
  author =       "Yun Chi and Xiaodan Song and Dengyong Zhou and Koji
                 Hino and Belle L. Tseng",
  title =        "On evolutionary spectral clustering",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631165",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Evolutionary clustering is an emerging research area
                 essential to important applications such as clustering
                 dynamic Web and blog contents and clustering data
                 streams. In evolutionary clustering, a good clustering
                 result should fit the current data well, while
                 simultaneously not deviate too dramatically from the
                 recent history. To fulfill this dual purpose, a measure
                 of {\em temporal smoothness\/} is integrated in the
                 overall measure of clustering quality. In this article,
                 we propose two frameworks that incorporate temporal
                 smoothness in evolutionary spectral clustering. For
                 both frameworks, we start with intuitions gained from
                 the well-known {\em k\/} -means clustering problem, and
                 then propose and solve corresponding cost functions for
                 the evolutionary spectral clustering problems. Our
                 solutions to the evolutionary spectral clustering
                 problems provide more stable and consistent clustering
                 results that are less sensitive to short-term noises
                 while at the same time are adaptive to long-term
                 cluster drifts. Furthermore, we demonstrate that our
                 methods provide the optimal solutions to the relaxed
                 versions of the corresponding evolutionary {\em k\/}
                 -means clustering problems. Performance experiments
                 over a number of real and synthetic data sets
                 illustrate our evolutionary spectral clustering methods
                 provide more robust clustering results that are not
                 sensitive to noise and can adapt to data drifts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Evolutionary spectral clustering; preserving cluster
                 membership; preserving cluster quality; temporal
                 smoothness",
}

@Article{Fujiwara:2009:FLS,
  author =       "Yasuhiro Fujiwara and Yasushi Sakurai and Masaru
                 Kitsuregawa",
  title =        "Fast likelihood search for hidden {Markov} models",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "18:1--18:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631166",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Hidden Markov models (HMMs) are receiving considerable
                 attention in various communities and many applications
                 that use HMMs have emerged such as mental task
                 classification, biological analysis, traffic
                 monitoring, and anomaly detection. This article has two
                 goals; The first goal is exact and efficient
                 identification of the model whose state sequence has
                 the highest likelihood for the given query sequence
                 (more precisely, no HMM that actually has a
                 high-probability path for the given sequence is missed
                 by the algorithm), and the second goal is exact and
                 efficient monitoring of streaming data sequences to
                 find the best model. We propose SPIRAL, a fast search
                 method for HMM datasets. SPIRAL is based on three
                 ideas; (1) it clusters states of models to compute
                 approximate likelihood, (2) it uses several
                 granularities and approximates likelihood values in
                 search processing, and (3) it focuses on just the
                 promising likelihood computations by pruning out
                 low-likelihood state sequences. Experiments verify the
                 effectiveness of SPIRAL and show that it is more than
                 490 times faster than the naive method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Hidden Markov model; likelihood; upper bound",
}

@Article{Zhang:2009:EAG,
  author =       "Xiang Zhang and Fei Zou and Wei Wang",
  title =        "Efficient algorithms for genome-wide association
                 study",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "19:1--19:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631167",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Studying the association between quantitative
                 phenotype (such as height or weight) and single
                 nucleotide polymorphisms (SNPs) is an important problem
                 in biology. To understand underlying mechanisms of
                 complex phenotypes, it is often necessary to consider
                 joint genetic effects across multiple SNPs. ANOVA
                 (analysis of variance) test is routinely used in
                 association study. Important findings from studying
                 gene-gene (SNP-pair) interactions are appearing in the
                 literature. However, the number of SNPs can be up to
                 millions. Evaluating joint effects of SNPs is a
                 challenging task even for SNP-pairs. Moreover, with
                 large number of SNPs correlated, permutation procedure
                 is preferred over simple Bonferroni correction for
                 properly controlling family-wise error rate and
                 retaining mapping power, which dramatically increases
                 the computational cost of association study.\par

                 In this article, we study the problem of finding
                 SNP-pairs that have significant associations with a
                 given quantitative phenotype. We propose an efficient
                 algorithm, FastANOVA, for performing ANOVA tests on
                 SNP-pairs in a batch mode, which also supports large
                 permutation test. We derive an upper bound of SNP-pair
                 ANOVA test, which can be expressed as the sum of two
                 terms. The first term is based on single-SNP ANOVA
                 test. The second term is based on the SNPs and
                 independent of any phenotype permutation. Furthermore,
                 SNP-pairs can be organized into groups, each of which
                 shares a common upper bound. This allows for maximum
                 reuse of intermediate computation, efficient upper
                 bound estimation, and effective SNP-pair pruning.
                 Consequently, FastANOVA only needs to perform the ANOVA
                 test on a small number of candidate SNP-pairs without
                 the risk of missing any significant ones. Extensive
                 experiments demonstrate that FastANOVA is orders of
                 magnitude faster than the brute-force implementation of
                 ANOVA tests on all SNP pairs. The principles used in
                 FastANOVA can be applied to categorical phenotypes and
                 other statistics such as Chi-square test.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "ANOVA test; Association study; permutation test",
}

@Article{Bilgic:2009:RCM,
  author =       "Mustafa Bilgic and Lise Getoor",
  title =        "Reflect and correct: a misclassification prediction
                 approach to active inference",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "20:1--20:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631168",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Information diffusion, viral marketing, graph-based
                 semi-supervised learning, and collective classification
                 all attempt to model and exploit the relationships
                 among nodes in a network to improve the performance of
                 node labeling algorithms. However, sometimes the
                 advantage of exploiting the relationships can become a
                 disadvantage. Simple models like label propagation and
                 iterative classification can aggravate a
                 misclassification by propagating mistakes in the
                 network, while more complex models that define and
                 optimize a global objective function, such as Markov
                 random fields and graph mincuts, can misclassify a set
                 of nodes jointly. This problem can be mitigated if the
                 classification system is allowed to ask for the correct
                 labels for a few of the nodes during inference.
                 However, determining the optimal set of labels to
                 acquire is intractable under relatively general
                 assumptions, which forces us to resort to approximate
                 and heuristic techniques. We describe three such
                 techniques in this article. The first one is based on
                 directly approximating the value of the objective
                 function of label acquisition and greedily acquiring
                 the label that provides the most improvement. The
                 second technique is a simple technique based on the
                 analogy we draw between viral marketing and label
                 acquisition. Finally, we propose a method, which we
                 refer to as {\em reflect and correct}, that can learn
                 and predict when the classification system is likely to
                 make mistakes and suggests acquisitions to correct
                 those mistakes. We empirically show on a variety of
                 synthetic and real-world datasets that the reflect and
                 correct method significantly outperforms the other two
                 techniques, as well as other approaches based on
                 network structural measures such as node degree and
                 network clustering.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Active inference; collective classification;
                 information diffusion; label acquisition; viral
                 marketing",
}

@Article{Kiernan:2009:CCS,
  author =       "Jerry Kiernan and Evimaria Terzi",
  title =        "Constructing comprehensive summaries of large event
                 sequences",
  journal =      j-TKDD,
  volume =       "3",
  number =       "4",
  pages =        "21:1--21:??",
  month =        nov,
  year =         "2009",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1631162.1631169",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:13 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Event sequences capture system and user activity over
                 time. Prior research on sequence mining has mostly
                 focused on discovering local patterns appearing in a
                 sequence. While interesting, these patterns do not give
                 a comprehensive summary of the entire event sequence.
                 Moreover, the number of patterns discovered can be
                 large. In this article, we take an alternative approach
                 and build {\em short\/} summaries that describe an
                 entire sequence, and discover local dependencies
                 between event types.\par

                 We formally define the summarization problem as an
                 optimization problem that balances shortness of the
                 summary with accuracy of the data description. We show
                 that this problem can be solved optimally in polynomial
                 time by using a combination of two dynamic-programming
                 algorithms. We also explore more efficient greedy
                 alternatives and demonstrate that they work well on
                 large datasets. Experiments on both synthetic and real
                 datasets illustrate that our algorithms are efficient
                 and produce high-quality results, and reveal
                 interesting local structures in the data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Event sequences; log mining; summarization",
}

@Article{Koren:2010:FNS,
  author =       "Yehuda Koren",
  title =        "Factor in the neighbors: {Scalable} and accurate
                 collaborative filtering",
  journal =      j-TKDD,
  volume =       "4",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644873.1644874",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:37 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recommender systems provide users with personalized
                 suggestions for products or services. These systems
                 often rely on collaborating filtering (CF), where past
                 transactions are analyzed in order to establish
                 connections between users and products. The most common
                 approach to CF is based on neighborhood models, which
                 originate from similarities between products or users.
                 In this work we introduce a new neighborhood model with
                 an improved prediction accuracy. Unlike previous
                 approaches that are based on heuristic similarities, we
                 model neighborhood relations by minimizing a global
                 cost function. Further accuracy improvements are
                 achieved by extending the model to exploit both
                 explicit and implicit feedback by the users. Past
                 models were limited by the need to compute all pairwise
                 similarities between items or users, which grow
                 quadratically with input size. In particular, this
                 limitation vastly complicates adopting user similarity
                 models, due to the typical large number of users. Our
                 new model solves these limitations by factoring the
                 neighborhood model, thus making both item-item and
                 user-user implementations scale linearly with the size
                 of the data. The methods are tested on the Netflix
                 data, with encouraging results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "collaborative filtering; Netflix Prize; Recommender
                 systems",
}

@Article{Syed:2010:MDP,
  author =       "Zeeshan Syed and Collin Stultz and Manolis Kellis and
                 Piotr Indyk and John Guttag",
  title =        "Motif discovery in physiological datasets: a
                 methodology for inferring predictive elements",
  journal =      j-TKDD,
  volume =       "4",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644873.1644875",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:37 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article, we propose a methodology for
                 identifying predictive physiological patterns in the
                 absence of prior knowledge. We use the principle of
                 conservation to identify activity that consistently
                 precedes an outcome in patients, and describe a
                 two-stage process that allows us to efficiently search
                 for such patterns in large datasets. This involves
                 first transforming continuous physiological signals
                 from patients into symbolic sequences, and then
                 searching for patterns in these reduced representations
                 that are strongly associated with an outcome.\par

                 Our strategy of identifying conserved activity that is
                 unlikely to have occurred purely by chance in symbolic
                 data is analogous to the discovery of regulatory motifs
                 in genomic datasets. We build upon existing work in
                 this area, generalizing the notion of a regulatory
                 motif and enhancing current techniques to operate
                 robustly on non-genomic data. We also address two
                 significant considerations associated with motif
                 discovery in general: computational efficiency and
                 robustness in the presence of degeneracy and noise. To
                 deal with these issues, we introduce the concept of
                 active regions and new subset-based techniques such as
                 a two-layer Gibbs sampling algorithm. These extensions
                 allow for a framework for information inference, where
                 precursors are identified as approximately conserved
                 activity of arbitrary complexity preceding multiple
                 occurrences of an event.\par

                 We evaluated our solution on a population of patients
                 who experienced sudden cardiac death and attempted to
                 discover electrocardiographic activity that may be
                 associated with the endpoint of death. To assess the
                 predictive patterns discovered, we compared likelihood
                 scores for motifs in the sudden death population
                 against control populations of normal individuals and
                 those with non-fatal supraventricular arrhythmias. Our
                 results suggest that predictive motif discovery may be
                 able to identify clinically relevant information even
                 in the absence of significant prior knowledge.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "data mining; Gibbs sampling; inference; knowledge
                 discovery; motifs; physiological signals",
}

@Article{Webb:2010:SSI,
  author =       "Geoffrey I. Webb",
  title =        "Self-sufficient itemsets: an approach to screening
                 potentially interesting associations between items",
  journal =      j-TKDD,
  volume =       "4",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644873.1644876",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:37 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Self-sufficient itemsets are those whose frequency
                 cannot be explained solely by the frequency of either
                 their subsets or of their supersets. We argue that
                 itemsets that are not self-sufficient will often be of
                 little interest to the data analyst, as their frequency
                 should be expected once that of the itemsets on which
                 their frequency depends is known. We present tests for
                 statistically sound discovery of self-sufficient
                 itemsets, and computational techniques that allow those
                 tests to be applied as a post-processing step for any
                 itemset discovery algorithm. We also present a measure
                 for assessing the degree of potential interest in an
                 itemset that complements these statistical measures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Association discovery; association rules; itemset
                 discovery; itemset screening; statistical evaluation",
}

@Article{Plantevit:2010:MMM,
  author =       "Marc Plantevit and Anne Laurent and Dominique Laurent
                 and Maguelonne Teisseire and Yeow Wei Choong",
  title =        "Mining multidimensional and multilevel sequential
                 patterns",
  journal =      j-TKDD,
  volume =       "4",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644873.1644877",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:37 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multidimensional databases have been designed to
                 provide decision makers with the necessary tools to
                 help them understand their data. This framework is
                 different from transactional data as the datasets
                 contain huge volumes of historicized and aggregated
                 data defined over a set of dimensions that can be
                 arranged through multiple levels of granularities. Many
                 tools have been proposed to query the data and navigate
                 through the levels of granularity. However, automatic
                 tools are still missing to mine this type of data in
                 order to discover regular specific patterns. In this
                 article, we present a method for mining sequential
                 patterns from multidimensional databases, at the same
                 time taking advantage of the different dimensions and
                 levels of granularity, which is original compared to
                 existing work. The necessary definitions and algorithms
                 are extended from regular sequential patterns to this
                 particular case. Experiments are reported, showing the
                 significance of this approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "frequent patterns; hierarchy; multidimensional
                 databases; multilevel patterns; Sequential patterns",
}

@Article{Zaki:2010:VVO,
  author =       "Mohammed J. Zaki and Christopher D. Carothers and
                 Boleslaw K. Szymanski",
  title =        "{VOGUE}: a variable order hidden {Markov} model with
                 duration based on frequent sequence mining",
  journal =      j-TKDD,
  volume =       "4",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1644873.1644878",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Tue Mar 16 18:37:37 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present VOGUE, a novel, variable order hidden
                 Markov model with state durations, that combines two
                 separate techniques for modeling complex patterns in
                 sequential data: pattern mining and data modeling.
                 VOGUE relies on a variable gap sequence mining method
                 to extract frequent patterns with different lengths and
                 gaps between elements. It then uses these mined
                 sequences to build a variable order hidden Markov model
                 (HMM), that explicitly models the gaps. The gaps
                 implicitly model the order of the HMM, and they
                 explicitly model the duration of each state. We apply
                 VOGUE to a variety of real sequence data taken from
                 domains such as protein sequence classification, Web
                 usage logs, intrusion detection, and spelling
                 correction. We show that VOGUE has superior
                 classification accuracy compared to regular HMMs,
                 higher-order HMMs, and even special purpose HMMs like
                 HMMER, which is a state-of-the-art method for protein
                 classification. The VOGUE implementation and the
                 datasets used in this article are available as
                 open-source.$^1$",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "Hidden Markov models; higher-order HMM; HMM with
                 duration; sequence mining and modeling; variable-order
                 HMM",
}

@Article{Vadera:2010:CCS,
  author =       "Sunil Vadera",
  title =        "{CSNL}: a cost-sensitive non-linear decision tree
                 algorithm",
  journal =      j-TKDD,
  volume =       "4",
  number =       "2",
  pages =        "6:1--6:??",
  month =        may,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1754428.1754429",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Sat Aug 14 17:12:30 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article presents a new decision tree learning
                 algorithm called CSNL that induces Cost-Sensitive
                 Non-Linear decision trees. The algorithm is based on
                 the hypothesis that nonlinear decision nodes provide a
                 better basis than axis-parallel decision nodes and
                 utilizes discriminant analysis to construct nonlinear
                 decision trees that take account of costs of
                 misclassification.\par

                 The performance of the algorithm is evaluated by
                 applying it to seventeen datasets and the results are
                 compared with those obtained by two well known
                 cost-sensitive algorithms, ICET and MetaCost, which
                 generate multiple trees to obtain some of the best
                 results to date. The results show that CSNL performs at
                 least as well, if not better than these algorithms, in
                 more than twelve of the datasets and is considerably
                 faster. The use of bagging with CSNL further enhances
                 its performance showing the significant benefits of
                 using nonlinear decision nodes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "cost-sensitive learning; Decision tree learning",
}

@Article{Kandylas:2010:AKC,
  author =       "Vasileios Kandylas and S. Phineas Upham and Lyle H.
                 Ungar",
  title =        "Analyzing knowledge communities using foreground and
                 background clusters",
  journal =      j-TKDD,
  volume =       "4",
  number =       "2",
  pages =        "7:1--7:??",
  month =        may,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1754428.1754430",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Sat Aug 14 17:12:30 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Insight into the growth (or shrinkage) of ``knowledge
                 communities'' of authors that build on each other's
                 work can be gained by studying the evolution over time
                 of clusters of documents. We cluster documents based on
                 the documents they cite in common using the Streemer
                 clustering method, which finds cohesive foreground
                 clusters (the knowledge communities) embedded in a
                 diffuse background. We build predictive models with
                 features based on the citation structure, the
                 vocabulary of the papers, and the affiliations and
                 prestige of the authors and use these models to study
                 the drivers of community growth and the predictors of
                 how widely a paper will be cited. We find that
                 scientific knowledge communities tend to grow more
                 rapidly if their publications build on diverse
                 information and use narrow vocabulary and that papers
                 that lie on the periphery of a community have the
                 highest impact, while those not in any community have
                 the lowest impact.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "citation analysis; clustering; community evolution;
                 knowledge communities; Text mining",
}

@Article{Ji:2010:SSL,
  author =       "Shuiwang Ji and Lei Tang and Shipeng Yu and Jieping
                 Ye",
  title =        "A shared-subspace learning framework for multi-label
                 classification",
  journal =      j-TKDD,
  volume =       "4",
  number =       "2",
  pages =        "8:1--8:??",
  month =        may,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1754428.1754431",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Sat Aug 14 17:12:30 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multi-label problems arise in various domains such as
                 multi-topic document categorization, protein function
                 prediction, and automatic image annotation. One natural
                 way to deal with such problems is to construct a binary
                 classifier for each label, resulting in a set of
                 independent binary classification problems. Since
                 multiple labels share the same input space, and the
                 semantics conveyed by different labels are usually
                 correlated, it is essential to exploit the correlation
                 information contained in different labels. In this
                 paper, we consider a general framework for extracting
                 shared structures in multi-label classification. In
                 this framework, a common subspace is assumed to be
                 shared among multiple labels. We show that the optimal
                 solution to the proposed formulation can be obtained by
                 solving a generalized eigenvalue problem, though the
                 problem is nonconvex. For high-dimensional problems,
                 direct computation of the solution is expensive, and we
                 develop an efficient algorithm for this case. One
                 appealing feature of the proposed framework is that it
                 includes several well-known algorithms as special
                 cases, thus elucidating their intrinsic relationships.
                 We further show that the proposed framework can be
                 extended to the kernel-induced feature space. We have
                 conducted extensive experiments on multi-topic web page
                 categorization and automatic gene expression pattern
                 image annotation tasks, and results demonstrate the
                 effectiveness of the proposed formulation in comparison
                 with several representative algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "gene expression pattern image annotation; kernel
                 methods; least squares loss; Multi-label
                 classification; shared subspace; singular value
                 decomposition; web page categorization",
}

@Article{Ruggieri:2010:DMD,
  author =       "Salvatore Ruggieri and Dino Pedreschi and Franco
                 Turini",
  title =        "Data mining for discrimination discovery",
  journal =      j-TKDD,
  volume =       "4",
  number =       "2",
  pages =        "9:1--9:??",
  month =        may,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1754428.1754432",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Sat Aug 14 17:12:30 MDT 2010",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In the context of civil rights law, discrimination
                 refers to unfair or unequal treatment of people based
                 on membership to a category or a minority, without
                 regard to individual merit. Discrimination in credit,
                 mortgage, insurance, labor market, and education has
                 been investigated by researchers in economics and human
                 sciences. With the advent of automatic decision support
                 systems, such as credit scoring systems, the ease of
                 data collection opens several challenges to data
                 analysts for the fight against discrimination. In this
                 article, we introduce the problem of discovering
                 discrimination through data mining in a dataset of
                 historical decision records, taken by humans or by
                 automatic systems. We formalize the processes of direct
                 and indirect discrimination discovery by modelling
                 protected-by-law groups and contexts where
                 discrimination occurs in a classification rule based
                 syntax. Basically, classification rules extracted from
                 the dataset allow for unveiling contexts of unlawful
                 discrimination, where the degree of burden over
                 protected-by-law groups is formalized by an extension
                 of the lift measure of a classification rule. In direct
                 discrimination, the extracted rules can be directly
                 mined in search of discriminatory contexts. In indirect
                 discrimination, the mining process needs some
                 background knowledge as a further input, for example,
                 census data, that combined with the extracted rules
                 might allow for unveiling contexts of discriminatory
                 decisions. A strategy adopted for combining extracted
                 classification rules with background knowledge is
                 called an inference model. In this article, we propose
                 two inference models and provide automatic procedures
                 for their implementation. An empirical assessment of
                 our results is provided on the German credit dataset
                 and on the PKDD Discovery Challenge 1999 financial
                 dataset.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
  keywords =     "classification rules; Discrimination",
}

@Article{Thomas:2010:MMF,
  author =       "Lini T. Thomas and Satyanarayana R. Valluri and
                 Kamalakar Karlapalem",
  title =        "{MARGIN}: {Maximal} frequent subgraph mining",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "10:1--10:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839491",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Deodhar:2010:SFS,
  author =       "Meghana Deodhar and Joydeep Ghosh",
  title =        "{SCOAL}: a framework for simultaneous co-clustering
                 and learning from complex data",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839492",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2010:BBI,
  author =       "Jinlin Chen and Keli Xiao",
  title =        "{BISC}: a bitmap itemset support counting approach for
                 efficient frequent itemset mining",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839493",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Becchetti:2010:EAL,
  author =       "Luca Becchetti and Paolo Boldi and Carlos Castillo and
                 Aristides Gionis",
  title =        "Efficient algorithms for large-scale local triangle
                 counting",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839494",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2010:MDR,
  author =       "Yin Zhang and Zhi-Hua Zhou",
  title =        "Multilabel dimensionality reduction via dependence
                 maximization",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839495",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cui:2010:LMN,
  author =       "Ying Cui and Xiaoli Z. Fern and Jennifer G. Dy",
  title =        "Learning multiple nonredundant clusterings",
  journal =      j-TKDD,
  volume =       "4",
  number =       "3",
  pages =        "15:1--15:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1839490.1839496",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:57 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2010:TSI,
  author =       "Wei Wang",
  title =        "{TKDD} Special Issue: {SIGKDD 2009}",
  journal =      j-TKDD,
  volume =       "4",
  number =       "4",
  pages =        "16:1--16:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1857947.1857948",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:58 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2010:BTA,
  author =       "Ye Chen and Dmitry Pavlov and John F. Canny",
  title =        "Behavioral Targeting: The Art of Scaling Up Simple
                 Algorithms",
  journal =      j-TKDD,
  volume =       "4",
  number =       "4",
  pages =        "17:1--17:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1857947.1857949",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:58 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mohammed:2010:CDA,
  author =       "Noman Mohammed and Benjamin C. M. Fung and Patrick C.
                 K. Hung and Cheuk-Kwong Lee",
  title =        "Centralized and Distributed Anonymization for
                 High-Dimensional Healthcare Data",
  journal =      j-TKDD,
  volume =       "4",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1857947.1857950",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:58 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2010:BBM,
  author =       "Chao Liu and Fan Guo and Christos Faloutsos",
  title =        "{Bayesian} Browsing Model: Exact Inference of Document
                 Relevance from Petabyte-Scale Data",
  journal =      j-TKDD,
  volume =       "4",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1857947.1857951",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:58 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2010:MAF,
  author =       "Mingxi Wu and Chris Jermaine and Sanjay Ranka and
                 Xiuyao Song and John Gums",
  title =        "A Model-Agnostic Framework for Fast Spatial Anomaly
                 Detection",
  journal =      j-TKDD,
  volume =       "4",
  number =       "4",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1857947.1857952",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:58 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhong:2010:ATS,
  author =       "Ning Zhong and Gregory Piatetsky-Shapiro and Yiyu Yao
                 and Philip S. Yu",
  title =        "{ACM TKDD} Special Issue on Knowledge Discovery for
                 {Web} Intelligence",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "1:1--1:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870097",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2010:CAW,
  author =       "Jie Tang and Limin Yao and Duo Zhang and Jing Zhang",
  title =        "A Combination Approach to {Web} User Profiling",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "2:1--2:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870098",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bouguessa:2010:DKS,
  author =       "Mohamed Bouguessa and Shengrui Wang and Benoit
                 Dumoulin",
  title =        "Discovering Knowledge-Sharing Communities in
                 Question-Answering Forums",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "3:1--3:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870099",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Plangprasopchok:2010:MSA,
  author =       "Anon Plangprasopchok and Kristina Lerman",
  title =        "Modeling Social Annotation: a {Bayesian} Approach",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "4:1--4:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870100",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sakurai:2010:FDG,
  author =       "Yasushi Sakurai and Christos Faloutsos and Spiros
                 Papadimitriou",
  title =        "Fast Discovery of Group Lag Correlations in Streams",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "5:1--5:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870101",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2010:FCP,
  author =       "Kun Liu and Evimaria Terzi",
  title =        "A Framework for Computing the Privacy Scores of Users
                 in Online Social Networks",
  journal =      j-TKDD,
  volume =       "5",
  number =       "1",
  pages =        "6:1--6:??",
  month =        dec,
  year =         "2010",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1870096.1870102",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:43:59 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2011:ISI,
  author =       "Jimeng Sun and Yan Liu and Jie Tang and Chid Apte",
  title =        "Introduction to Special Issue on Large-Scale Data
                 Mining",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921633",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kang:2011:HMR,
  author =       "U. Kang and Charalampos E. Tsourakakis and Ana Paula
                 Appel and Christos Faloutsos and Jure Leskovec",
  title =        "{HADI}: Mining Radii of Large Graphs",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921634",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{deVries:2011:RRL,
  author =       "Timothy de Vries and Hui Ke and Sanjay Chawla and
                 Peter Christen",
  title =        "Robust Record Linkage Blocking Using Suffix Arrays and
                 {Bloom} Filters",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921635",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dunlavy:2011:TLP,
  author =       "Daniel M. Dunlavy and Tamara G. Kolda and Evrim Acar",
  title =        "Temporal Link Prediction Using Matrix and Tensor
                 Factorizations",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921636",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Magdalinos:2011:ECQ,
  author =       "Panagis Magdalinos and Christos Doulkeridis and
                 Michalis Vazirgiannis",
  title =        "Enhancing Clustering Quality through Landmark-Based
                 Dimensionality Reduction",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921637",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2011:CLA,
  author =       "Hong Cheng and Yang Zhou and Jeffrey Xu Yu",
  title =        "Clustering Large Attributed Graphs: a Balance between
                 Structural and Attribute Similarities",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921638",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Menon:2011:FAA,
  author =       "Aditya Krishna Menon and Charles Elkan",
  title =        "Fast Algorithms for Approximating the Singular Value
                 Decomposition",
  journal =      j-TKDD,
  volume =       "5",
  number =       "2",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1921632.1921639",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Mon Mar 28 11:44:01 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A low-rank approximation to a matrix $A$ is a matrix
                 with significantly smaller rank than $A$, and which is
                 close to $A$ according to some norm. Many practical
                 applications involving the use of large matrices focus
                 on low-rank approximations. By reducing the rank or
                 dimensionality of the data, we reduce the complexity of
                 analyzing the data. The singular value decomposition is
                 the most popular low-rank matrix approximation.
                 However, due to its expensive computational
                 requirements, it has often been considered intractable
                 for practical applications involving massive data.
                 Recent developments have tried to address this problem,
                 with several methods proposed to approximate the
                 decomposition with better asymptotic runtime. We
                 present an empirical study of these techniques on a
                 variety of dense and sparse datasets. We find that a
                 sampling approach of Drineas, Kannan and Mahoney is
                 often, but not always, the best performing method. This
                 method gives solutions with high accuracy much faster
                 than classical SVD algorithms, on large sparse datasets
                 in particular. Other modern methods, such as a recent
                 algorithm by Rokhlin and Tygert, also offer savings
                 compared to classical SVD algorithms. The older
                 sampling methods of Achlioptas and McSherry are shown
                 to sometimes take longer than classical SVD.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2011:IDC,
  author =       "Dingding Wang and Shenghuo Zhu and Tao Li and Yun Chi
                 and Yihong Gong",
  title =        "Integrating Document Clustering and Multidocument
                 Summarization",
  journal =      j-TKDD,
  volume =       "5",
  number =       "3",
  pages =        "14:1--14:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1993077.1993078",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Thu Aug 18 13:28:08 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Maier:2011:INS,
  author =       "Marc Maier and Matthew Rattigan and David Jensen",
  title =        "Indexing Network Structure with Shortest-Path Trees",
  journal =      j-TKDD,
  volume =       "5",
  number =       "3",
  pages =        "15:1--15:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1993077.1993079",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Thu Aug 18 13:28:08 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wong:2011:CUA,
  author =       "Raymond Chi-Wing Wong and Ada Wai-Chee Fu and Ke Wang
                 and Philip S. Yu and Jian Pei",
  title =        "Can the Utility of Anonymized Data be Used for Privacy
                 Breaches?",
  journal =      j-TKDD,
  volume =       "5",
  number =       "3",
  pages =        "16:1--16:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1993077.1993080",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Thu Aug 18 13:28:08 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2011:CDM,
  author =       "Yu-Ru Lin and Jimeng Sun and Hari Sundaram and Aisling
                 Kelliher and Paul Castro and Ravi Konuru",
  title =        "Community Discovery via Metagraph Factorization",
  journal =      j-TKDD,
  volume =       "5",
  number =       "3",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2011",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/1993077.1993081",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  bibdate =      "Thu Aug 18 13:28:08 MDT 2011",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Elkan:2012:GES,
  author =       "Charles Elkan and Yehuda Koren",
  title =        "Guest Editorial for Special Issue {KDD'10}",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086738",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Iwata:2012:SMT,
  author =       "Tomoharu Iwata and Takeshi Yamada and Yasushi Sakurai
                 and Naonori Ueda",
  title =        "Sequential Modeling of Topic Dynamics with Multiple
                 Timescales",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086739",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We propose an online topic model for sequentially
                 analyzing the time evolution of topics in document
                 collections. Topics naturally evolve with multiple
                 timescales. For example, some words may be used
                 consistently over one hundred years, while other words
                 emerge and disappear over periods of a few days. Thus,
                 in the proposed model, current topic-specific
                 distributions over words are assumed to be generated
                 based on the multiscale word distributions of the
                 previous epoch. Considering both the long- and
                 short-timescale dependency yields a more robust model.
                 We derive efficient online inference procedures based
                 on a stochastic EM algorithm, in which the model is
                 sequentially updated using newly obtained data; this
                 means that past data are not required to make the
                 inference. We demonstrate the effectiveness of the
                 proposed method in terms of predictive performance and
                 computational efficiency by examining collections of
                 real documents with timestamps.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huh:2012:DTM,
  author =       "Seungil Huh and Stephen E. Fienberg",
  title =        "Discriminative Topic Modeling Based on Manifold
                 Learning",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086740",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Topic modeling has become a popular method used for
                 data analysis in various domains including text
                 documents. Previous topic model approaches, such as
                 probabilistic Latent Semantic Analysis (pLSA) and
                 Latent Dirichlet Allocation (LDA), have shown
                 impressive success in discovering low-rank hidden
                 structures for modeling text documents. These
                 approaches, however do not take into account the
                 manifold structure of the data, which is generally
                 informative for nonlinear dimensionality reduction
                 mapping. More recent topic model approaches, Laplacian
                 PLSI (LapPLSI) and Locally-consistent Topic Model
                 (LTM), have incorporated the local manifold structure
                 into topic models and have shown resulting benefits.
                 But they fall short of achieving full discriminating
                 power of manifold learning as they only enhance the
                 proximity between the low-rank representations of
                 neighboring pairs without any consideration for
                 non-neighboring pairs. In this article, we propose a
                 new approach, Discriminative Topic Model (DTM), which
                 separates non-neighboring pairs from each other in
                 addition to bringing neighboring pairs closer together,
                 thereby preserving the global manifold structure as
                 well as improving local consistency. We also present a
                 novel model-fitting algorithm based on the generalized
                 EM algorithm and the concept of Pareto improvement. We
                 empirically demonstrate the success of DTM in terms of
                 unsupervised clustering and semisupervised
                 classification accuracies on text corpora and
                 robustness to parameters compared to state-of-the-art
                 techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gomez-Rodriguez:2012:IND,
  author =       "Manuel Gomez-Rodriguez and Jure Leskovec and Andreas
                 Krause",
  title =        "Inferring Networks of Diffusion and Influence",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086741",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Information diffusion and virus propagation are
                 fundamental processes taking place in networks. While
                 it is often possible to directly observe when nodes
                 become infected with a virus or publish the
                 information, observing individual transmissions (who
                 infects whom, or who influences whom) is typically very
                 difficult. Furthermore, in many applications, the
                 underlying network over which the diffusions and
                 propagations spread is actually unobserved. We tackle
                 these challenges by developing a method for tracing
                 paths of diffusion and influence through networks and
                 inferring the networks over which contagions propagate.
                 Given the times when nodes adopt pieces of information
                 or become infected, we identify the optimal network
                 that best explains the observed infection times. Since
                 the optimization problem is NP-hard to solve exactly,
                 we develop an efficient approximation algorithm that
                 scales to large datasets and finds provably
                 near-optimal networks. We demonstrate the effectiveness
                 of our approach by tracing information diffusion in a
                 set of 170 million blogs and news articles over a one
                 year period to infer how information flows through the
                 online media space. We find that the diffusion network
                 of news for the top 1,000 media sites and blogs tends
                 to have a core-periphery structure with a small set of
                 core media sites that diffuse information to the rest
                 of the Web. These sites tend to have stable circles of
                 influence with more general news media sites acting as
                 connectors between them.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2012:LIS,
  author =       "Jianhui Chen and Ji Liu and Jieping Ye",
  title =        "Learning Incoherent Sparse and Low-Rank Patterns from
                 Multiple Tasks",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086742",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We consider the problem of learning incoherent sparse
                 and low-rank patterns from multiple tasks. Our approach
                 is based on a linear multitask learning formulation, in
                 which the sparse and low-rank patterns are induced by a
                 cardinality regularization term and a low-rank
                 constraint, respectively. This formulation is
                 nonconvex; we convert it into its convex surrogate,
                 which can be routinely solved via semidefinite
                 programming for small-size problems. We propose
                 employing the general projected gradient scheme to
                 efficiently solve such a convex surrogate; however, in
                 the optimization formulation, the objective function is
                 nondifferentiable and the feasible domain is
                 nontrivial. We present the procedures for computing the
                 projected gradient and ensuring the global convergence
                 of the projected gradient scheme. The computation of
                 the projected gradient involves a constrained
                 optimization problem; we show that the optimal solution
                 to such a problem can be obtained via solving an
                 unconstrained optimization subproblem and a Euclidean
                 projection subproblem. We also present two projected
                 gradient algorithms and analyze their rates of
                 convergence in detail. In addition, we illustrate the
                 use of the presented projected gradient algorithms for
                 the proposed multitask learning formulation using the
                 least squares loss. Experimental results on a
                 collection of real-world data sets demonstrate the
                 effectiveness of the proposed multitask learning
                 formulation and the efficiency of the proposed
                 projected gradient algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2012:LLC,
  author =       "Hsiang-Fu Yu and Cho-Jui Hsieh and Kai-Wei Chang and
                 Chih-Jen Lin",
  title =        "Large Linear Classification When Data Cannot Fit in
                 Memory",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086743",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recent advances in linear classification have shown
                 that for applications such as document classification,
                 the training process can be extremely efficient.
                 However, most of the existing training methods are
                 designed by assuming that data can be stored in the
                 computer memory. These methods cannot be easily applied
                 to data larger than the memory capacity due to the
                 random access to the disk. We propose and analyze a
                 block minimization framework for data larger than the
                 memory size. At each step a block of data is loaded
                 from the disk and handled by certain learning methods.
                 We investigate two implementations of the proposed
                 framework for primal and dual SVMs, respectively.
                 Because data cannot fit in memory, many design
                 considerations are very different from those for
                 traditional algorithms. We discuss and compare with
                 existing approaches that are able to handle data larger
                 than memory. Experiments using data sets 20 times
                 larger than the memory demonstrate the effectiveness of
                 the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shahaf:2012:CTL,
  author =       "Dafna Shahaf and Carlos Guestrin",
  title =        "Connecting Two (or Less) Dots: Discovering Structure
                 in News Articles",
  journal =      j-TKDD,
  volume =       "5",
  number =       "4",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2086737.2086744",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 16 15:19:57 MDT 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Finding information is becoming a major part of our
                 daily life. Entire sectors, from Web users to
                 scientists and intelligence analysts, are increasingly
                 struggling to keep up with the larger and larger
                 amounts of content published every day. With this much
                 data, it is often easy to miss the big picture. In this
                 article, we investigate methods for automatically
                 connecting the dots---providing a structured, easy way
                 to navigate within a new topic and discover hidden
                 connections. We focus on the news domain: given two
                 news articles, our system automatically finds a
                 coherent chain linking them together. For example, it
                 can recover the chain of events starting with the
                 decline of home prices (January 2007), and ending with
                 the health care debate (2009). We formalize the
                 characteristics of a good chain and provide a fast
                 search-driven algorithm to connect two fixed endpoints.
                 We incorporate user feedback into our framework,
                 allowing the stories to be refined and personalized. We
                 also provide a method to handle partially-specified
                 endpoints, for users who do not know both ends of a
                 story. Finally, we evaluate our algorithm over real
                 news data. Our user studies demonstrate that the
                 objective we propose captures the users' intuitive
                 notion of coherence, and that our algorithm effectively
                 helps users understand the news.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ienco:2012:CDL,
  author =       "Dino Ienco and Ruggero G. Pensa and Rosa Meo",
  title =        "From Context to Distance: Learning Dissimilarity for
                 Categorical Data Clustering",
  journal =      j-TKDD,
  volume =       "6",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2133360.2133361",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering data described by categorical attributes is
                 a challenging task in data mining applications. Unlike
                 numerical attributes, it is difficult to define a
                 distance between pairs of values of a categorical
                 attribute, since the values are not ordered. In this
                 article, we propose a framework to learn a
                 context-based distance for categorical attributes. The
                 key intuition of this work is that the distance between
                 two values of a categorical attribute A$_i$ can be
                 determined by the way in which the values of the other
                 attributes A$_j$ are distributed in the dataset
                 objects: if they are similarly distributed in the
                 groups of objects in correspondence of the distinct
                 values of A$_i$ a low value of distance is obtained. We
                 propose also a solution to the critical point of the
                 choice of the attributes A$_j$. We validate our
                 approach by embedding our distance learning framework
                 in a hierarchical clustering algorithm. We applied it
                 on various real world and synthetic datasets, both low
                 and high-dimensional. Experimental results show that
                 our method is competitive with respect to the state of
                 the art of categorical data clustering approaches. We
                 also show that our approach is scalable and has a low
                 impact on the overall computational time of a
                 clustering task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2012:EMG,
  author =       "Chun Li and Qingyan Yang and Jianyong Wang and Ming
                 Li",
  title =        "Efficient Mining of Gap-Constrained Subsequences and
                 Its Various Applications",
  journal =      j-TKDD,
  volume =       "6",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2133360.2133362",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Mining frequent subsequence patterns is a typical
                 data-mining problem and various efficient sequential
                 pattern mining algorithms have been proposed. In many
                 application domains (e.g., biology), the frequent
                 subsequences confined by the predefined gap
                 requirements are more meaningful than the general
                 sequential patterns. In this article, we propose two
                 algorithms, Gap-BIDE for mining closed gap-constrained
                 subsequences from a set of input sequences, and
                 Gap-Connect for mining repetitive gap-constrained
                 subsequences from a single input sequence. Inspired by
                 some state-of-the-art closed or constrained sequential
                 pattern mining algorithms, the Gap-BIDE algorithm
                 adopts an efficient approach to finding the complete
                 set of closed sequential patterns with gap constraints,
                 while the Gap-Connect algorithm efficiently mines an
                 approximate set of long patterns by connecting short
                 patterns. We also present several methods for feature
                 selection from the set of gap-constrained patterns for
                 the purpose of classification and clustering. Our
                 extensive performance study shows that our approaches
                 are very efficient in mining frequent subsequences with
                 gap constraints, and the gap-constrained pattern based
                 classification/clustering approaches can achieve
                 high-quality results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2012:IBA,
  author =       "Fei Tony Liu and Kai Ming Ting and Zhi-Hua Zhou",
  title =        "Isolation-Based Anomaly Detection",
  journal =      j-TKDD,
  volume =       "6",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2133360.2133363",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Anomalies are data points that are few and different.
                 As a result of these properties, we show that,
                 anomalies are susceptible to a mechanism called
                 isolation. This article proposes a method called
                 Isolation Forest ($i$ Forest), which detects anomalies
                 purely based on the concept of isolation without
                 employing any distance or density
                 measure---fundamentally different from all existing
                 methods. As a result, $i$ Forest is able to exploit
                 subsampling (i) to achieve a low linear time-complexity
                 and a small memory-requirement and (ii) to deal with
                 the effects of swamping and masking effectively. Our
                 empirical evaluation shows that $i$ Forest outperforms
                 ORCA, one-class SVM, LOF and Random Forests in terms of
                 AUC, processing time, and it is robust against masking
                 and swamping effects. $i$ Forest also works well in
                 high dimensional problems containing a large number of
                 irrelevant attributes, and when anomalies are not
                 available in training sample.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jin:2012:MML,
  author =       "Yu Jin and Nick Duffield and Jeffrey Erman and Patrick
                 Haffner and Subhabrata Sen and Zhi-Li Zhang",
  title =        "A Modular Machine Learning System for Flow-Level
                 Traffic Classification in Large Networks",
  journal =      j-TKDD,
  volume =       "6",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2133360.2133364",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The ability to accurately and scalably classify
                 network traffic is of critical importance to a wide
                 range of management tasks of large networks, such as
                 tier-1 ISP networks and global enterprise networks.
                 Guided by the practical constraints and requirements of
                 traffic classification in large networks, in this
                 article, we explore the design of an accurate and
                 scalable machine learning based flow-level traffic
                 classification system, which is trained on a dataset of
                 flow-level data that has been annotated with
                 application protocol labels by a packet-level
                 classifier. Our system employs a lightweight modular
                 architecture, which combines a series of simple linear
                 binary classifiers, each of which can be efficiently
                 implemented and trained on vast amounts of flow data in
                 parallel, and embraces three key innovative mechanisms,
                 weighted threshold sampling, logistic calibration, and
                 intelligent data partitioning, to achieve scalability
                 while attaining high accuracy. Evaluations using real
                 traffic data from multiple locations in a large ISP
                 show that our system accurately reproduces the labels
                 of the packet level classifier when runs on (unlabeled)
                 flow records, while meeting the scalability and
                 stability requirements of large ISP networks. Using
                 training and test datasets that are two months apart
                 and collected from two different locations, the flow
                 error rates are only 3\% for TCP flows and 0.4\% for
                 UDP flows. We further show that such error rates can be
                 reduced by combining the information of spatial
                 distributions of flows, or collective traffic
                 statistics, during classification. We propose a novel
                 two-step model, which seamlessly integrates these
                 collective traffic statistics into the existing traffic
                 classification system. Experimental results display
                 performance improvement on all traffic classes and an
                 overall error rate reduction by 15\%. In addition to a
                 high accuracy, at runtime, our implementation easily
                 scales to classify traffic on 10Gbps links.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mavroeidis:2012:SSF,
  author =       "Dimitrios Mavroeidis and Panagis Magdalinos",
  title =        "A Sequential Sampling Framework for Spectral $k$-Means
                 Based on Efficient Bootstrap Accuracy Estimations:
                 Application to Distributed Clustering",
  journal =      j-TKDD,
  volume =       "6",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2297456.2297457",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The scalability of learning algorithms has always been
                 a central concern for data mining researchers, and
                 nowadays, with the rapid increase in data storage
                 capacities and availability, its importance has
                 increased. To this end, sampling has been studied by
                 several researchers in an effort to derive sufficiently
                 accurate models using only small data fractions. In
                 this article we focus on spectral $k$-means, that is,
                 the $k$-means approximation as derived by the spectral
                 relaxation, and propose a sequential sampling framework
                 that iteratively enlarges the sample size until the
                 $k$-means results (objective function and cluster
                 structure) become indistinguishable from the asymptotic
                 (infinite-data) output. In the proposed framework we
                 adopt a commonly applied principle in data mining
                 research that considers the use of minimal assumptions
                 concerning the data generating distribution. This
                 restriction imposes several challenges, mainly related
                 to the efficiency of the sequential sampling procedure.
                 These challenges are addressed using elements of matrix
                 perturbation theory and statistics. Moreover, although
                 the main focus is on spectral $k$-means, we also
                 demonstrate that the proposed framework can be
                 generalized to handle spectral clustering. The proposed
                 sequential sampling framework is consecutively employed
                 for addressing the distributed clustering problem,
                 where the task is to construct a global model for data
                 that resides in distributed network nodes. The main
                 challenge in this context is related to the bandwidth
                 constraints that are commonly imposed, thus requiring
                 that the distributed clustering algorithm consumes a
                 minimal amount of network load. This illustrates the
                 applicability of the proposed approach, as it enables
                 the determination of a minimal sample size that can be
                 used for constructing an accurate clustering model that
                 entails the distributional characteristics of the data.
                 As opposed to the relevant distributed $k$-means
                 approaches, our framework takes into account the fact
                 that the choice of the number of clusters has a crucial
                 effect on the required amount of communication. More
                 precisely, the proposed algorithm is able to derive a
                 statistical estimation of the required relative sizes
                 for all possible values of $k$. This unique feature of
                 our distributed clustering framework enables a network
                 administrator to choose an economic solution that
                 identifies the crude cluster structure of a dataset and
                 not devote excessive network resources for identifying
                 all the ``correct'' detailed clusters.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Das:2012:MIG,
  author =       "Sanmay Das and Malik Magdon-Ismail",
  title =        "A Model for Information Growth in Collective Wisdom
                 Processes",
  journal =      j-TKDD,
  volume =       "6",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2297456.2297458",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Collaborative media such as wikis have become
                 enormously successful venues for information creation.
                 Articles accrue information through the asynchronous
                 editing of users who arrive both seeking information
                 and possibly able to contribute information. Most
                 articles stabilize to high-quality, trusted sources of
                 information representing the collective wisdom of all
                 the users who edited the article. We propose a model
                 for information growth which relies on two main
                 observations: (i) as an article's quality improves, it
                 attracts visitors at a faster rate (a rich-get-richer
                 phenomenon); and, simultaneously, (ii) the chances that
                 a new visitor will improve the article drops (there is
                 only so much that can be said about a particular
                 topic). Our model is able to reproduce many features of
                 the edit dynamics observed on Wikipedia; in particular,
                 it captures the observed rise in the edit rate,
                 followed by $ 1 / t $ decay. Despite differences in the
                 media, we also document similar features in the comment
                 rates for a segment of the LiveJournal blogosphere.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2012:GME,
  author =       "Tianbing Xu and Zhongfei Zhang and Philip S. Yu and Bo
                 Long",
  title =        "Generative Models for Evolutionary Clustering",
  journal =      j-TKDD,
  volume =       "6",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2297456.2297459",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article studies evolutionary clustering, a
                 recently emerged hot topic with many important
                 applications, noticeably in dynamic social network
                 analysis. In this article, based on the recent
                 literature on nonparametric Bayesian models, we have
                 developed two generative models: DPChain and HDP-HTM.
                 DPChain is derived from the Dirichlet process mixture
                 (DPM) model, with an exponential decaying component
                 along with the time. HDP-HTM combines the hierarchical
                 dirichlet process (HDP) with a hierarchical transition
                 matrix (HTM) based on the proposed Infinite
                 hierarchical Markov state model (iHMS). Both models
                 substantially advance the literature on evolutionary
                 clustering, in the sense that not only do they both
                 perform better than those in the existing literature,
                 but more importantly, they are capable of automatically
                 learning the cluster numbers and explicitly addressing
                 the corresponding issues. Extensive evaluations have
                 demonstrated the effectiveness and the promise of these
                 two solutions compared to the state-of-the-art
                 literature.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2012:LME,
  author =       "Shaojun Wang and Dale Schuurmans and Yunxin Zhao",
  title =        "The Latent Maximum Entropy Principle",
  journal =      j-TKDD,
  volume =       "6",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2297456.2297460",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present an extension to Jaynes' maximum entropy
                 principle that incorporates latent variables. The
                 principle of latent maximum entropy we propose is
                 different from both Jaynes' maximum entropy principle
                 and maximum likelihood estimation, but can yield better
                 estimates in the presence of hidden variables and
                 limited training data. We first show that solving for a
                 latent maximum entropy model poses a hard nonlinear
                 constrained optimization problem in general. However,
                 we then show that feasible solutions to this problem
                 can be obtained efficiently for the special case of
                 log-linear models---which forms the basis for an
                 efficient approximation to the latent maximum entropy
                 principle. We derive an algorithm that combines
                 expectation-maximization with iterative scaling to
                 produce feasible log-linear solutions. This algorithm
                 can be interpreted as an alternating minimization
                 algorithm in the information divergence, and reveals an
                 intimate connection between the latent maximum entropy
                 and maximum likelihood principles. To select a final
                 model, we generate a series of feasible candidates,
                 calculate the entropy of each, and choose the model
                 that attains the highest entropy. Our experimental
                 results show that estimation based on the latent
                 maximum entropy principle generally gives better
                 results than maximum likelihood when estimating latent
                 variable models on small observed data samples.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bhattacharya:2012:CGC,
  author =       "Indrajit Bhattacharya and Shantanu Godbole and
                 Sachindra Joshi and Ashish Verma",
  title =        "Cross-Guided Clustering: Transfer of Relevant
                 Supervision across Tasks",
  journal =      j-TKDD,
  volume =       "6",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jul,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2297456.2297461",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:38 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Lack of supervision in clustering algorithms often
                 leads to clusters that are not useful or interesting to
                 human reviewers. We investigate if supervision can be
                 automatically transferred for clustering a target task,
                 by providing a relevant supervised partitioning of a
                 dataset from a different source task. The target
                 clustering is made more meaningful for the human user
                 by trading-off intrinsic clustering goodness on the
                 target task for alignment with relevant supervised
                 partitions in the source task, wherever possible. We
                 propose a cross-guided clustering algorithm that builds
                 on traditional k-means by aligning the target clusters
                 with source partitions. The alignment process makes use
                 of a cross-task similarity measure that discovers
                 hidden relationships across tasks. When the source and
                 target tasks correspond to different domains with
                 potentially different vocabularies, we propose a
                 projection approach using pivot vocabularies for the
                 cross-domain similarity measure. Using multiple
                 real-world and synthetic datasets, we show that our
                 approach improves clustering accuracy significantly
                 over traditional k-means and state-of-the-art
                 semi-supervised clustering baselines, over a wide range
                 of data characteristics and parameter settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2012:LBN,
  author =       "Zhenxing Wang and Laiwan Chan",
  title =        "Learning {Bayesian} networks from {Markov} random
                 fields: an efficient algorithm for linear models",
  journal =      j-TKDD,
  volume =       "6",
  number =       "3",
  pages =        "10:1--10:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362383.2362384",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:40 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Dependency analysis is a typical approach for Bayesian
                 network learning, which infers the structures of
                 Bayesian networks by the results of a series of
                 conditional independence (CI) tests. In practice,
                 testing independence conditioning on large sets hampers
                 the performance of dependency analysis algorithms in
                 terms of accuracy and running time for the following
                 reasons. First, testing independence on large sets of
                 variables with limited samples is not stable. Second,
                 for most dependency analysis algorithms, the number of
                 CI tests grows at an exponential rate with the sizes of
                 conditioning sets, and the running time grows of the
                 same rate. Therefore, determining how to reduce the
                 number of CI tests and the sizes of conditioning sets
                 becomes a critical step in dependency analysis
                 algorithms. In this article, we address a two-phase
                 algorithm based on the observation that the structures
                 of Markov random fields are similar to those of
                 Bayesian networks. The first phase of the algorithm
                 constructs a Markov random field from data, which
                 provides a close approximation to the structure of the
                 true Bayesian network; the second phase of the
                 algorithm removes redundant edges according to CI tests
                 to get the true Bayesian network. Both phases use
                 Markov blanket information to reduce the sizes of
                 conditioning sets and the number of CI tests without
                 sacrificing accuracy. An empirical study shows that the
                 two-phase algorithm performs well in terms of accuracy
                 and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chan:2012:CID,
  author =       "Jeffrey Chan and James Bailey and Christopher Leckie
                 and Michael Houle",
  title =        "{ciForager}: Incrementally discovering regions of
                 correlated change in evolving graphs",
  journal =      j-TKDD,
  volume =       "6",
  number =       "3",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362383.2362385",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:40 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Data mining techniques for understanding how graphs
                 evolve over time have become increasingly important.
                 Evolving graphs arise naturally in diverse applications
                 such as computer network topologies, multiplayer games
                 and medical imaging. A natural and interesting problem
                 in evolving graph analysis is the discovery of compact
                 subgraphs that change in a similar manner. Such
                 subgraphs are known as regions of correlated change and
                 they can both summarise change patterns in graphs and
                 help identify the underlying events causing these
                 changes. However, previous techniques for discovering
                 regions of correlated change suffer from limited
                 scalability, making them unsuitable for analysing the
                 evolution of very large graphs. In this paper, we
                 introduce a new algorithm called ciForager, that
                 addresses this scalability challenge and offers
                 considerable improvements. The efficiency of ciForager
                 is based on the use of new incremental techniques for
                 detecting change, as well as the use of Voronoi
                 representations for efficiently determining distance.
                 We experimentally show that ciForager can achieve
                 speedups of up to 1000 times over previous approaches.
                 As a result, it becomes feasible for the first time to
                 discover regions of correlated change in extremely
                 large graphs, such as the entire BGP routing topology
                 of the Internet.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2012:CDS,
  author =       "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong
                 Gong",
  title =        "Comparative document summarization via discriminative
                 sentence selection",
  journal =      j-TKDD,
  volume =       "6",
  number =       "3",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362383.2362386",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:40 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a collection of document groups, a natural
                 question is to identify the differences among them.
                 Although traditional document summarization techniques
                 can summarize the content of the document groups one by
                 one, there exists a great necessity to generate a
                 summary of the differences among the document groups.
                 In this article, we study a novel problem, that of
                 summarizing the differences between document groups. A
                 discriminative sentence selection method is proposed to
                 extract the most discriminative sentences which
                 represent the specific characteristics of each document
                 group. Experiments and case studies on real-world data
                 sets demonstrate the effectiveness of our proposed
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{deMelo:2012:FNO,
  author =       "Pedro O. S. {Vaz de Melo} and Virgilio A. F. Almeida
                 and Antonio A. F. Loureiro and Christos Faloutsos",
  title =        "Forecasting in the {NBA} and other team sports:
                 Network effects in action",
  journal =      j-TKDD,
  volume =       "6",
  number =       "3",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2362383.2362387",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Nov 6 18:30:40 MST 2012",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The multi-million sports-betting market is based on
                 the fact that the task of predicting the outcome of a
                 sports event is very hard. Even with the aid of an
                 uncountable number of descriptive statistics and
                 background information, only a few can correctly guess
                 the outcome of a game or a league. In this work, our
                 approach is to move away from the traditional way of
                 predicting sports events, and instead to model sports
                 leagues as networks of players and teams where the only
                 information available is the work relationships among
                 them. We propose two network-based models to predict
                 the behavior of teams in sports leagues. These models
                 are parameter-free, that is, they do not have a single
                 parameter, and moreover are sport-agnostic: they can be
                 applied directly to any team sports league. First, we
                 view a sports league as a network in evolution, and we
                 infer the implicit feedback behind network changes and
                 properties over the years. Then, we use this knowledge
                 to construct the network-based prediction models, which
                 can, with a significantly high probability, indicate
                 how well a team will perform over a season. We compare
                 our proposed models with other prediction models in two
                 of the most popular sports leagues: the National
                 Basketball Association (NBA) and the Major League
                 Baseball (MLB). Our model shows consistently good
                 results in comparison with the other models and,
                 relying upon the network properties of the teams, we
                 achieved a $ \approx 14 \% $ rank prediction accuracy
                 improvement over our best competitor.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ghosh:2012:SIB,
  author =       "Joydeep Ghosh and Padhraic Smyth and Andrew Tomkins
                 and Rich Caruana",
  title =        "Special issue on best of {SIGKDD 2011}",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382578",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kaufman:2012:LDM,
  author =       "Shachar Kaufman and Saharon Rosset and Claudia Perlich
                 and Ori Stitelman",
  title =        "Leakage in data mining: Formulation, detection, and
                 avoidance",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382579",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Deemed ``one of the top ten data mining mistakes'',
                 leakage is the introduction of information about the
                 data mining target that should not be legitimately
                 available to mine from. In addition to our own industry
                 experience with real-life projects, controversies
                 around several major public data mining competitions
                 held recently such as the INFORMS 2010 Data Mining
                 Challenge and the IJCNN 2011 Social Network Challenge
                 are evidence that this issue is as relevant today as it
                 has ever been. While acknowledging the importance and
                 prevalence of leakage in both synthetic competitions
                 and real-life data mining projects, existing literature
                 has largely left this idea unexplored. What little has
                 been said turns out not to be broad enough to cover
                 more complex cases of leakage, such as those where the
                 classical independently and identically distributed
                 (i.i.d.) assumption is violated, that have been
                 recently documented. In our new approach, these cases
                 and others are explained by explicitly defining
                 modeling goals and analyzing the broader framework of
                 the data mining problem. The resulting definition
                 enables us to derive general methodology for dealing
                 with the issue. We show that it is possible to avoid
                 leakage with a simple specific approach to data
                 management followed by what we call a learn-predict
                 separation, and present several ways of detecting
                 leakage when the modeler has no control over how the
                 data have been collected. We also offer an alternative
                 point of view on leakage that is based on causal graph
                 modeling concepts.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mampaey:2012:SDS,
  author =       "Michael Mampaey and Jilles Vreeken and Nikolaj Tatti",
  title =        "Summarizing data succinctly with the most informative
                 itemsets",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382580",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Knowledge discovery from data is an inherently
                 iterative process. That is, what we know about the data
                 greatly determines our expectations, and therefore,
                 what results we would find interesting and/or
                 surprising. Given new knowledge about the data, our
                 expectations will change. Hence, in order to avoid
                 redundant results, knowledge discovery algorithms
                 ideally should follow such an iterative updating
                 procedure. With this in mind, we introduce a
                 well-founded approach for succinctly summarizing data
                 with the most informative itemsets; using a
                 probabilistic maximum entropy model, we iteratively
                 find the itemset that provides us the most novel
                 information-that is, for which the frequency in the
                 data surprises us the most-and in turn we update our
                 model accordingly. As we use the maximum entropy
                 principle to obtain unbiased probabilistic models, and
                 only include those itemsets that are most informative
                 with regard to the current model, the summaries we
                 construct are guaranteed to be both descriptive and
                 nonredundant. The algorithm that we present, called
                 mtv, can either discover the top-$k$ most informative
                 itemsets, or we can employ either the Bayesian
                 Information Criterion (bic) or the Minimum Description
                 Length (mdl) principle to automatically identify the
                 set of itemsets that together summarize the data well.
                 In other words, our method will ``tell you what you
                 need to know'' about the data. Importantly, it is a
                 one-phase algorithm: rather than picking itemsets from
                 a user-provided candidate set, itemsets and their
                 supports are mined on-the-fly. To further its
                 applicability, we provide an efficient method to
                 compute the maximum entropy distribution using Quick
                 Inclusion-Exclusion. Experiments on our method, using
                 synthetic, benchmark, and real data, show that the
                 discovered summaries are succinct, and correctly
                 identify the key patterns in the data. The models they
                 form attain high likelihoods, and inspection shows that
                 they summarize the data well with increasingly
                 specific, yet nonredundant itemsets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chu:2012:TLM,
  author =       "Shumo Chu and James Cheng",
  title =        "Triangle listing in massive networks",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382581",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Triangle listing is one of the fundamental algorithmic
                 problems whose solution has numerous applications
                 especially in the analysis of complex networks, such as
                 the computation of clustering coefficients,
                 transitivity, triangular connectivity, trusses, etc.
                 Existing algorithms for triangle listing are mainly
                 in-memory algorithms, whose performance cannot scale
                 with the massive volume of today's fast growing
                 networks. When the input graph cannot fit in main
                 memory, triangle listing requires random disk accesses
                 that can incur prohibitively huge I/O cost. Some
                 streaming, semistreaming, and sampling algorithms have
                 been proposed but these are approximation algorithms.
                 We propose an I/O-efficient algorithm for triangle
                 listing. Our algorithm is exact and avoids random disk
                 access. Our results show that our algorithm is scalable
                 and outperforms the state-of-the-art in-memory and
                 local triangle estimation algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chattopadhyay:2012:MDA,
  author =       "Rita Chattopadhyay and Qian Sun and Wei Fan and Ian
                 Davidson and Sethuraman Panchanathan and Jieping Ye",
  title =        "Multisource domain adaptation and its application to
                 early detection of fatigue",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382582",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We consider the characterization of muscle fatigue
                 through a noninvasive sensing mechanism such as Surface
                 ElectroMyoGraphy (SEMG). While changes in the
                 properties of SEMG signals with respect to muscle
                 fatigue have been reported in the literature, the large
                 variation in these signals across different individuals
                 makes the task of modeling and classification of SEMG
                 signals challenging. Indeed, the variation in SEMG
                 parameters from subject to subject creates differences
                 in the data distribution. In this article, we propose
                 two transfer learning frameworks based on the
                 multisource domain adaptation methodology for detecting
                 different stages of fatigue using SEMG signals, that
                 addresses the distribution differences. In the proposed
                 frameworks, the SEMG data of a subject represent a
                 domain; data from multiple subjects in the training set
                 form the multiple source domains and the test subject
                 data form the target domain. SEMG signals are
                 predominantly different in conditional probability
                 distribution across subjects. The key feature of the
                 first framework is a novel weighting scheme that
                 addresses the conditional probability distribution
                 differences across multiple domains (subjects) and the
                 key feature of the second framework is a two-stage
                 domain adaptation methodology which combines weighted
                 data from multiple sources based on marginal
                 probability differences (first stage) as well as
                 conditional probability differences (second stage),
                 with the target domain data. The weights for minimizing
                 the marginal probability differences are estimated
                 independently, while the weights for minimizing
                 conditional probability differences are computed
                 simultaneously by exploiting the potential interaction
                 among multiple sources. We also provide a theoretical
                 analysis on the generalization performance of the
                 proposed multisource domain adaptation formulation
                 using the weighted Rademacher complexity measure. We
                 have validated the proposed frameworks on Surface
                 ElectroMyoGram signals collected from 8 people during a
                 fatigue-causing repetitive gripping activity.
                 Comprehensive experiments on the SEMG dataset
                 demonstrate that the proposed method improves the
                 classification accuracy by 20\% to 30\% over the cases
                 without any domain adaptation method and by 13\% to
                 30\% over existing state-of-the-art domain adaptation
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wilkinson:2012:SIS,
  author =       "Leland Wilkinson and Anushka Anand and Tuan Nhon
                 Dang",
  title =        "Substantial improvements in the set-covering
                 projection classifier {CHIRP} (composite hypercubes on
                 iterated random projections)",
  journal =      j-TKDD,
  volume =       "6",
  number =       "4",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2012",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2382577.2382583",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:40 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In Wilkinson et al. [2011] we introduced a new
                 set-covering random projection classifier that achieved
                 average error lower than that of other classifiers in
                 the Weka platform. This classifier was based on an $
                 L^\infty $ norm distance function and exploited an
                 iterative sequence of three stages (projecting,
                 binning, and covering) to deal with the curse of
                 dimensionality, computational complexity, and nonlinear
                 separability. We now present substantial changes that
                 improve robustness and reduce training and testing time
                 by almost an order of magnitude without jeopardizing
                 CHIRP's outstanding error performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Angiulli:2013:NNB,
  author =       "Fabrizio Angiulli and Fabio Fassetti",
  title =        "Nearest Neighbor-Based Classification of Uncertain
                 Data",
  journal =      j-TKDD,
  volume =       "7",
  number =       "1",
  pages =        "1:1--1:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435209.2435210",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:44 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This work deals with the problem of classifying
                 uncertain data. With this aim we introduce the
                 Uncertain Nearest Neighbor (UNN) rule, which represents
                 the generalization of the deterministic nearest
                 neighbor rule to the case in which uncertain objects
                 are available. The UNN rule relies on the concept of
                 nearest neighbor class, rather than on that of nearest
                 neighbor object. The nearest neighbor class of a test
                 object is the class that maximizes the probability of
                 providing its nearest neighbor. The evidence is that
                 the former concept is much more powerful than the
                 latter in the presence of uncertainty, in that it
                 correctly models the right semantics of the nearest
                 neighbor decision rule when applied to the uncertain
                 scenario. An effective and efficient algorithm to
                 perform uncertain nearest neighbor classification of a
                 generic (un)certain test object is designed, based on
                 properties that greatly reduce the temporal cost
                 associated with nearest neighbor class probability
                 computation. Experimental results are presented,
                 showing that the UNN rule is effective and efficient in
                 classifying uncertain data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2013:CDS,
  author =       "Dingding Wang and Shenghuo Zhu and Tao Li and Yihong
                 Gong",
  title =        "Comparative Document Summarization via Discriminative
                 Sentence Selection",
  journal =      j-TKDD,
  volume =       "7",
  number =       "1",
  pages =        "2:1--2:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435209.2435211",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:44 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a collection of document groups, a natural
                 question is to identify the differences among these
                 groups. Although traditional document summarization
                 techniques can summarize the content of the document
                 groups one by one, there exists a great necessity to
                 generate a summary of the differences among the
                 document groups. In this article, we study a novel
                 problem of summarizing the differences between document
                 groups. A discriminative sentence selection method is
                 proposed to extract the most discriminative sentences
                 that represent the specific characteristics of each
                 document group. Experiments and case studies on
                 real-world data sets demonstrate the effectiveness of
                 our proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bayati:2013:MPA,
  author =       "Mohsen Bayati and David F. Gleich and Amin Saberi and
                 Ying Wang",
  title =        "Message-Passing Algorithms for Sparse Network
                 Alignment",
  journal =      j-TKDD,
  volume =       "7",
  number =       "1",
  pages =        "3:1--3:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435209.2435212",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:44 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Network alignment generalizes and unifies several
                 approaches for forming a matching or alignment between
                 the vertices of two graphs. We study a mathematical
                 programming framework for network alignment problem and
                 a sparse variation of it where only a small number of
                 matches between the vertices of the two graphs are
                 possible. We propose a new message passing algorithm
                 that allows us to compute, very efficiently,
                 approximate solutions to the sparse network alignment
                 problems with graph sizes as large as hundreds of
                 thousands of vertices. We also provide extensive
                 simulations comparing our algorithms with two of the
                 best solvers for network alignment problems on two
                 synthetic matching problems, two bioinformatics
                 problems, and three large ontology alignment problems
                 including a multilingual problem with a known labeled
                 alignment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2013:CWM,
  author =       "Bin Li and Steven C. H. Hoi and Peilin Zhao and
                 Vivekanand Gopalkrishnan",
  title =        "Confidence Weighted Mean Reversion Strategy for Online
                 Portfolio Selection",
  journal =      j-TKDD,
  volume =       "7",
  number =       "1",
  pages =        "4:1--4:??",
  month =        mar,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2435209.2435213",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jun 24 13:02:44 MDT 2013",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Online portfolio selection has been attracting
                 increasing attention from the data mining and machine
                 learning communities. All existing online portfolio
                 selection strategies focus on the first order
                 information of a portfolio vector, though the second
                 order information may also be beneficial to a strategy.
                 Moreover, empirical evidence shows that relative stock
                 prices may follow the mean reversion property, which
                 has not been fully exploited by existing strategies.
                 This article proposes a novel online portfolio
                 selection strategy named Confidence Weighted Mean
                 Reversion (CWMR). Inspired by the mean reversion
                 principle in finance and confidence weighted online
                 learning technique in machine learning, CWMR models the
                 portfolio vector as a Gaussian distribution, and
                 sequentially updates the distribution by following the
                 mean reversion trading principle. CWMR's closed-form
                 updates clearly reflect the mean reversion trading
                 idea. We also present several variants of CWMR
                 algorithms, including a CWMR mixture algorithm that is
                 theoretical universal. Empirically, CWMR strategy is
                 able to effectively exploit the power of mean reversion
                 for online portfolio selection. Extensive experiments
                 on various real markets show that the proposed strategy
                 is superior to the state-of-the-art techniques. The
                 experimental testbed including source codes and data
                 sets is available online.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lou:2013:LPR,
  author =       "Tiancheng Lou and Jie Tang and John Hopcroft and
                 Zhanpeng Fang and Xiaowen Ding",
  title =        "Learning to predict reciprocity and triadic closure in
                 social networks",
  journal =      j-TKDD,
  volume =       "7",
  number =       "2",
  pages =        "5:1--5:??",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499907.2499908",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:06 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We study how links are formed in social networks. In
                 particular, we focus on investigating how a reciprocal
                 (two-way) link, the basic relationship in social
                 networks, is developed from a parasocial (one-way)
                 relationship and how the relationships further develop
                 into triadic closure, one of the fundamental processes
                 of link formation. We first investigate how geographic
                 distance and interactions between users influence the
                 formation of link structure among users. Then we study
                 how social theories including homophily, social
                 balance, and social status are satisfied over networks
                 with parasocial and reciprocal relationships. The study
                 unveils several interesting phenomena. For example,
                 ``friend's friend is a friend'' indeed exists in the
                 reciprocal relationship network, but does not hold in
                 the parasocial relationship network. We propose a
                 learning framework to formulate the problems of
                 predicting reciprocity and triadic closure into a
                 graphical model. We demonstrate that it is possible to
                 accurately infer 90\% of reciprocal relationships in a
                 Twitter network. The proposed model also achieves
                 better performance (+20--30\% in terms of F1-measure)
                 than several alternative methods for predicting the
                 triadic closure formation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2013:EOL,
  author =       "Haiqin Yang and Michael R. Lyu and Irwin King",
  title =        "Efficient online learning for multitask feature
                 selection",
  journal =      j-TKDD,
  volume =       "7",
  number =       "2",
  pages =        "6:1--6:??",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499907.2499909",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:06 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Learning explanatory features across multiple related
                 tasks, or MultiTask Feature Selection (MTFS), is an
                 important problem in the applications of data mining,
                 machine learning, and bioinformatics. Previous MTFS
                 methods fulfill this task by batch-mode training. This
                 makes them inefficient when data come sequentially or
                 when the number of training data is so large that they
                 cannot be loaded into the memory simultaneously. In
                 order to tackle these problems, we propose a novel
                 online learning framework to solve the MTFS problem. A
                 main advantage of the online algorithm is its
                 efficiency in both time complexity and memory cost. The
                 weights of the MTFS models at each iteration can be
                 updated by closed-form solutions based on the average
                 of previous subgradients. This yields the worst-case
                 bounds of the time complexity and memory cost at each
                 iteration, both in the order of $ O(d \times Q) $,
                 where $d$ is the number of feature dimensions and $Q$
                 is the number of tasks. Moreover, we provide
                 theoretical analysis for the average regret of the
                 online learning algorithms, which also guarantees the
                 convergence rate of the algorithms. Finally, we conduct
                 detailed experiments to show the characteristics and
                 merits of the online learning algorithms in solving
                 several MTFS problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2013:MRL,
  author =       "Yu Zhang and Dit-Yan Yeung",
  title =        "Multilabel relationship learning",
  journal =      j-TKDD,
  volume =       "7",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499907.2499910",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:06 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multilabel learning problems are commonly found in
                 many applications. A characteristic shared by many
                 multilabel learning problems is that some labels have
                 significant correlations between them. In this article,
                 we propose a novel multilabel learning method, called
                 MultiLabel Relationship Learning (MLRL), which extends
                 the conventional support vector machine by explicitly
                 learning and utilizing the relationships between
                 labels. Specifically, we model the label relationships
                 using a label covariance matrix and use it to define a
                 new regularization term for the optimization problem.
                 MLRL learns the model parameters and the label
                 covariance matrix simultaneously based on a unified
                 convex formulation. To solve the convex optimization
                 problem, we use an alternating method in which each
                 subproblem can be solved efficiently. The relationship
                 between MLRL and two widely used maximum margin methods
                 for multilabel learning is investigated. Moreover, we
                 also propose a semisupervised extension of MLRL, called
                 SSMLRL, to demonstrate how to make use of unlabeled
                 data to help learn the label covariance matrix. Through
                 experiments conducted on some multilabel applications,
                 we find that MLRL not only gives higher classification
                 accuracy but also has better interpretability as
                 revealed by the label covariance matrix.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Peng:2013:EFF,
  author =       "Jing Peng and Guna Seetharaman and Wei Fan and Aparna
                 Varde",
  title =        "Exploiting {Fisher} and {Fukunaga--Koontz} transforms
                 in {Chernoff} dimensionality reduction",
  journal =      j-TKDD,
  volume =       "7",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jul,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2499907.2499911",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:06 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Knowledge discovery from big data demands effective
                 representation of data. However, big data are often
                 characterized by high dimensionality, which makes
                 knowledge discovery more difficult. Many techniques for
                 dimensionality reduction have been proposed, including
                 well-known Fisher's Linear Discriminant Analysis (LDA).
                 However, the Fisher criterion is incapable of dealing
                 with heteroscedasticity in the data. A technique based
                 on the Chernoff criterion for linear dimensionality
                 reduction has been proposed that is capable of
                 exploiting heteroscedastic information in the data.
                 While the Chernoff criterion has been shown to
                 outperform the Fisher's, a clear understanding of its
                 exact behavior is lacking. In this article, we show
                 precisely what can be expected from the Chernoff
                 criterion. In particular, we show that the Chernoff
                 criterion exploits the Fisher and Fukunaga-Koontz
                 transforms in computing its linear discriminants.
                 Furthermore, we show that a recently proposed
                 decomposition of the data space into four subspaces is
                 incomplete. We provide arguments on how to best enrich
                 the decomposition of the data space in order to account
                 for heteroscedasticity in the data. Finally, we provide
                 experimental results validating our theoretical
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Agarwal:2013:ISI,
  author =       "Deepak Agarwal and Rich Caruana and Jian Pei and Ke
                 Wang",
  title =        "Introduction to the {Special Issue ACM SIGKDD 2012}",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2513092.2513093",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rakthanmanon:2013:ABD,
  author =       "Thanawin Rakthanmanon and Bilson Campana and Abdullah
                 Mueen and Gustavo Batista and Brandon Westover and
                 Qiang Zhu and Jesin Zakaria and Eamonn Keogh",
  title =        "Addressing Big Data Time Series: Mining Trillions of
                 Time Series Subsequences Under Dynamic Time Warping",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2500489",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Most time series data mining algorithms use similarity
                 search as a core subroutine, and thus the time taken
                 for similarity search is the bottleneck for virtually
                 all time series data mining algorithms, including
                 classification, clustering, motif discovery, anomaly
                 detection, and so on. The difficulty of scaling a
                 search to large datasets explains to a great extent why
                 most academic work on time series data mining has
                 plateaued at considering a few millions of time series
                 objects, while much of industry and science sits on
                 billions of time series objects waiting to be explored.
                 In this work we show that by using a combination of
                 four novel ideas we can search and mine massive time
                 series for the first time. We demonstrate the following
                 unintuitive fact: in large datasets we can exactly
                 search under Dynamic Time Warping (DTW) much more
                 quickly than the current state-of-the-art Euclidean
                 distance search algorithms. We demonstrate our work on
                 the largest set of time series experiments ever
                 attempted. In particular, the largest dataset we
                 consider is larger than the combined size of all of the
                 time series datasets considered in all data mining
                 papers ever published. We explain how our ideas allow
                 us to solve higher-level time series data mining
                 problems such as motif discovery and clustering at
                 scales that would otherwise be untenable. Moreover, we
                 show how our ideas allow us to efficiently support the
                 uniform scaling distance measure, a measure whose
                 utility seems to be underappreciated, but which we
                 demonstrate here. In addition to mining massive
                 datasets with up to one trillion datapoints, we will
                 show that our ideas also have implications for
                 real-time monitoring of data streams, allowing us to
                 handle much faster arrival rates and/or use cheaper and
                 lower powered devices than are currently possible.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2013:PIM,
  author =       "Yizhou Sun and Brandon Norick and Jiawei Han and
                 Xifeng Yan and Philip S. Yu and Xiao Yu",
  title =        "{PathSelClus}: Integrating Meta-Path Selection with
                 User-Guided Object Clustering in Heterogeneous
                 Information Networks",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2500492",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Real-world, multiple-typed objects are often
                 interconnected, forming heterogeneous information
                 networks. A major challenge for link-based clustering
                 in such networks is their potential to generate many
                 different results, carrying rather diverse semantic
                 meanings. In order to generate desired clustering, we
                 propose to use meta-path, a path that connects object
                 types via a sequence of relations, to control
                 clustering with distinct semantics. Nevertheless, it is
                 easier for a user to provide a few examples (seeds)
                 than a weighted combination of sophisticated meta-paths
                 to specify her clustering preference. Thus, we propose
                 to integrate meta-path selection with user-guided
                 clustering to cluster objects in networks, where a user
                 first provides a small set of object seeds for each
                 cluster as guidance. Then the system learns the weight
                 for each meta-path that is consistent with the
                 clustering result implied by the guidance, and
                 generates clusters under the learned weights of
                 meta-paths. A probabilistic approach is proposed to
                 solve the problem, and an effective and efficient
                 iterative algorithm, PathSelClus, is proposed to learn
                 the model, where the clustering quality and the
                 meta-path weights mutually enhance each other. Our
                 experiments with several clustering tasks in two real
                 networks and one synthetic network demonstrate the
                 power of the algorithm in comparison with the
                 baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bellare:2013:ASE,
  author =       "Kedar Bellare and Suresh Iyengar and Aditya
                 Parameswaran and Vibhor Rastogi",
  title =        "Active Sampling for Entity Matching with Guarantees",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2500490",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In entity matching, a fundamental issue while training
                 a classifier to label pairs of entities as either
                 duplicates or nonduplicates is the one of selecting
                 informative training examples. Although active learning
                 presents an attractive solution to this problem,
                 previous approaches minimize the misclassification rate
                 (0--1 loss) of the classifier, which is an unsuitable
                 metric for entity matching due to class imbalance
                 (i.e., many more nonduplicate pairs than duplicate
                 pairs). To address this, a recent paper [Arasu et al.
                 2010] proposes to maximize recall of the classifier
                 under the constraint that its precision should be
                 greater than a specified threshold. However, the
                 proposed technique requires the labels of all n input
                 pairs in the worst case. Our main result is an active
                 learning algorithm that approximately maximizes recall
                 of the classifier while respecting a precision
                 constraint with provably sublinear label complexity
                 (under certain distributional assumptions). Our
                 algorithm uses as a black box any active learning
                 module that minimizes 0--1 loss. We show that label
                 complexity of our algorithm is at most log n times the
                 label complexity of the black box, and also bound the
                 difference in the recall of classifier learnt by our
                 algorithm and the recall of the optimal classifier
                 satisfying the precision constraint. We provide an
                 empirical evaluation of our algorithm on several
                 real-world matching data sets that demonstrates the
                 effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chattopadhyay:2013:BMA,
  author =       "Rita Chattopadhyay and Zheng Wang and Wei Fan and Ian
                 Davidson and Sethuraman Panchanathan and Jieping Ye",
  title =        "Batch Mode Active Sampling Based on Marginal
                 Probability Distribution Matching",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2513092.2513094",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Active Learning is a machine learning and data mining
                 technique that selects the most informative samples for
                 labeling and uses them as training data; it is
                 especially useful when there are large amount of
                 unlabeled data and labeling them is expensive.
                 Recently, batch-mode active learning, where a set of
                 samples are selected concurrently for labeling, based
                 on their collective merit, has attracted a lot of
                 attention. The objective of batch-mode active learning
                 is to select a set of informative samples so that a
                 classifier learned on these samples has good
                 generalization performance on the unlabeled data. Most
                 of the existing batch-mode active learning
                 methodologies try to achieve this by selecting samples
                 based on certain criteria. In this article we propose a
                 novel criterion which achieves good generalization
                 performance of a classifier by specifically selecting a
                 set of query samples that minimize the difference in
                 distribution between the labeled and the unlabeled
                 data, after annotation. We explicitly measure this
                 difference based on all candidate subsets of the
                 unlabeled data and select the best subset. The proposed
                 objective is an NP-hard integer programming
                 optimization problem. We provide two optimization
                 techniques to solve this problem. In the first one, the
                 problem is transformed into a convex quadratic
                 programming problem and in the second method the
                 problem is transformed into a linear programming
                 problem. Our empirical studies using publicly available
                 UCI datasets and two biomedical image databases
                 demonstrate the effectiveness of the proposed approach
                 in comparison with the state-of-the-art batch-mode
                 active learning methods. We also present two extensions
                 of the proposed approach, which incorporate uncertainty
                 of the predicted labels of the unlabeled data and
                 transfer learning in the proposed formulation. In
                 addition, we present a joint optimization framework for
                 performing both transfer and active learning
                 simultaneously unlike the existing approaches of
                 learning in two separate stages, that is, typically,
                 transfer learning followed by active learning. We
                 specifically minimize a common objective of reducing
                 distribution difference between the domain adapted
                 source, the queried and labeled samples and the rest of
                 the unlabeled target domain data. Our empirical studies
                 on two biomedical image databases and on a publicly
                 available 20 Newsgroups dataset show that incorporation
                 of uncertainty information and transfer learning
                 further improves the performance of the proposed active
                 learning based classifier. Our empirical studies also
                 show that the proposed transfer-active method based on
                 the joint optimization framework performs significantly
                 better than a framework which implements transfer and
                 active learning in two separate stages.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Briggs:2013:IAM,
  author =       "Forrest Briggs and Xiaoli Z. Fern and Raviv Raich and
                 Qi Lou",
  title =        "Instance Annotation for Multi-Instance Multi-Label
                 Learning",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2500491",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multi-instance multi-label learning (MIML) is a
                 framework for supervised classification where the
                 objects to be classified are bags of instances
                 associated with multiple labels. For example, an image
                 can be represented as a bag of segments and associated
                 with a list of objects it contains. Prior work on MIML
                 has focused on predicting label sets for previously
                 unseen bags. We instead consider the problem of
                 predicting instance labels while learning from data
                 labeled only at the bag level. We propose a regularized
                 rank-loss objective designed for instance annotation,
                 which can be instantiated with different aggregation
                 models connecting instance-level labels with bag-level
                 label sets. The aggregation models that we consider can
                 be factored as a linear function of a ``support
                 instance'' for each class, which is a single feature
                 vector representing a whole bag. Hence we name our
                 proposed methods rank-loss Support Instance Machines
                 (SIM). We propose two optimization methods for the
                 rank-loss objective, which is nonconvex. One is a
                 heuristic method that alternates between updating
                 support instances, and solving a convex problem in
                 which the support instances are treated as constant.
                 The other is to apply the constrained concave-convex
                 procedure (CCCP), which can also be interpreted as
                 iteratively updating support instances and solving a
                 convex problem. To solve the convex problem, we employ
                 the Pegasos framework of primal subgradient descent,
                 and prove that it finds an $ \epsilon $-suboptimal
                 solution in runtime that is linear in the number of
                 bags, instances, and $ 1 / \epsilon $. Additionally, we
                 suggest a method of extending the linear learning
                 algorithm to nonlinear classification, without
                 increasing the runtime asymptotically. Experiments on
                 artificial and real-world datasets including images and
                 audio show that the proposed methods achieve higher
                 accuracy than other loss functions used in prior work,
                 e.g., Hamming loss, and recent work in ambiguous label
                 classification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ji:2013:PFR,
  author =       "Ming Ji and Binbin Lin and Xiaofei He and Deng Cai and
                 Jiawei Han",
  title =        "Parallel Field Ranking",
  journal =      j-TKDD,
  volume =       "7",
  number =       "3",
  pages =        "15:1--15:??",
  month =        sep,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2513092.2513096",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:07 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recently, ranking data with respect to the intrinsic
                 geometric structure (manifold ranking) has received
                 considerable attentions, with encouraging performance
                 in many applications in pattern recognition,
                 information retrieval and recommendation systems. Most
                 of the existing manifold ranking methods focus on
                 learning a ranking function that varies smoothly along
                 the data manifold. However, beyond smoothness, a
                 desirable ranking function should vary monotonically
                 along the geodesics of the data manifold, such that the
                 ranking order along the geodesics is preserved. In this
                 article, we aim to learn a ranking function that varies
                 linearly and therefore monotonically along the
                 geodesics of the data manifold. Recent theoretical work
                 shows that the gradient field of a linear function on
                 the manifold has to be a parallel vector field.
                 Therefore, we propose a novel ranking algorithm on the
                 data manifolds, called Parallel Field Ranking.
                 Specifically, we try to learn a ranking function and a
                 vector field simultaneously. We require the vector
                 field to be close to the gradient field of the ranking
                 function, and the vector field to be as parallel as
                 possible. Moreover, we require the value of the ranking
                 function at the query point to be the highest, and then
                 decrease linearly along the manifold. Experimental
                 results on both synthetic data and real data
                 demonstrate the effectiveness of our proposed
                 algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Adali:2013:IPR,
  author =       "Sibel Adali and Malik Magdon-Ismail and Xiaohui Lu",
  title =        "{iHypR}: Prominence ranking in networks of
                 collaborations with hyperedges 1",
  journal =      j-TKDD,
  volume =       "7",
  number =       "4",
  pages =        "16:1--16:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2541268.2541269",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:09 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present a new algorithm called iHypR for computing
                 prominence of actors in social networks of
                 collaborations. Our algorithm builds on the assumption
                 that prominent actors collaborate on prominent objects,
                 and prominent objects are naturally grouped into
                 prominent clusters or groups (hyperedges in a graph).
                 iHypR makes use of the relationships between actors,
                 objects, and hyperedges to compute a global prominence
                 score for the actors in the network. We do not assume
                 the hyperedges are given in advance. Hyperedges
                 computed by our method can perform as well or even
                 better than ``true'' hyperedges. Our algorithm is
                 customized for networks of collaborations, but it is
                 generally applicable without further tuning. We show,
                 through extensive experimentation with three real-life
                 data sets and multiple external measures of prominence,
                 that our algorithm outperforms existing well-known
                 algorithms. Our work is the first to offer such an
                 extensive evaluation. We show that unlike most existing
                 algorithms, the performance is robust across multiple
                 measures of performance. Further, we give a detailed
                 study of the sensitivity of our algorithm to different
                 data sets and the design choices within the algorithm
                 that a user may wish to change. Our article illustrates
                 the various trade-offs that must be considered in
                 computing prominence in collaborative social
                 networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2013:STP,
  author =       "Jin Huang and Feiping Nie and Heng Huang and Yi-Cheng
                 Tu and Yu Lei",
  title =        "Social trust prediction using heterogeneous networks",
  journal =      j-TKDD,
  volume =       "7",
  number =       "4",
  pages =        "17:1--17:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2541268.2541270",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:09 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Along with increasing popularity of social websites,
                 online users rely more on the trustworthiness
                 information to make decisions, extract and filter
                 information, and tag and build connections with other
                 users. However, such social network data often suffer
                 from severe data sparsity and are not able to provide
                 users with enough information. Therefore, trust
                 prediction has emerged as an important topic in social
                 network research. Traditional approaches are primarily
                 based on exploring trust graph topology itself.
                 However, research in sociology and our life experience
                 suggest that people who are in the same social circle
                 often exhibit similar behaviors and tastes. To take
                 advantage of the ancillary information for trust
                 prediction, the challenge then becomes what to transfer
                 and how to transfer. In this article, we address this
                 problem by aggregating heterogeneous social networks
                 and propose a novel joint social networks mining (JSNM)
                 method. Our new joint learning model explores the
                 user-group-level similarity between correlated graphs
                 and simultaneously learns the individual graph
                 structure; therefore, the shared structures and
                 patterns from multiple social networks can be utilized
                 to enhance the prediction tasks. As a result, we not
                 only improve the trust prediction in the target graph
                 but also facilitate other information retrieval tasks
                 in the auxiliary graphs. To optimize the proposed
                 objective function, we use the alternative technique to
                 break down the objective function into several
                 manageable subproblems. We further introduce the
                 auxiliary function to solve the optimization problems
                 with rigorously proved convergence. The extensive
                 experiments have been conducted on both synthetic and
                 real- world data. All empirical results demonstrate the
                 effectiveness of our method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guzzo:2013:SIF,
  author =       "Antonella Guzzo and Luigi Moccia and Domenico
                 Sacc{\`a} and Edoardo Serra",
  title =        "Solving inverse frequent itemset mining with
                 infrequency constraints via large-scale linear
                 programs",
  journal =      j-TKDD,
  volume =       "7",
  number =       "4",
  pages =        "18:1--18:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2541268.2541271",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:09 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Inverse frequent set mining (IFM) is the problem of
                 computing a transaction database D satisfying given
                 support constraints for some itemsets, which are
                 typically the frequent ones. This article proposes a
                 new formulation of IFM, called IFM$_I$ (IFM with
                 infrequency constraints), where the itemsets that are
                 not listed as frequent are constrained to be
                 infrequent; that is, they must have a support less than
                 or equal to a specified unique threshold. An instance
                 of IFM$_I$ can be seen as an instance of the original
                 IFM by making explicit the infrequency constraints for
                 the minimal infrequent itemsets, corresponding to the
                 so-called negative generator border defined in the
                 literature. The complexity increase from PSPACE
                 (complexity of IFM) to NEXP (complexity of IFM$_I$) is
                 caused by the cardinality of the negative generator
                 border, which can be exponential in the original input
                 size. Therefore, the article introduces a specific
                 problem parameter $ \kappa $ that computes an upper
                 bound to this cardinality using a hypergraph
                 interpretation for which minimal infrequent itemsets
                 correspond to minimal transversals. By fixing a
                 constant k, the article formulates a $k$-bounded
                 definition of the problem, called $k$-IFM$_I$, that
                 collects all instances for which the value of the
                 parameter $ \kappa $ is less than or equal to $k$-its
                 complexity is in PSPACE as for IFM. The bounded problem
                 is encoded as an integer linear program with a large
                 number of variables (actually exponential w.r.t. the
                 number of constraints), which is thereafter
                 approximated by relaxing integer constraints-the
                 decision problem of solving the linear program is
                 proven to be in NP. In order to solve the linear
                 program, a column generation technique is used that is
                 a variation of the simplex method designed to solve
                 large-scale linear programs, in particular with a huge
                 number of variables. The method at each step requires
                 the solution of an auxiliary integer linear program,
                 which is proven to be NP hard in this case and for
                 which a greedy heuristic is presented. The resulting
                 overall column generation solution algorithm enjoys
                 very good scaling as evidenced by the intensive
                 experimentation, thereby paving the way for its
                 application in real-life scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Balcazar:2013:FCP,
  author =       "Jos{\'e} L. Balc{\'a}zar",
  title =        "Formal and computational properties of the confidence
                 boost of association rules",
  journal =      j-TKDD,
  volume =       "7",
  number =       "4",
  pages =        "19:1--19:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2541268.2541272",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:09 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Some existing notions of redundancy among association
                 rules allow for a logical-style characterization and
                 lead to irredundant bases of absolutely minimum size.
                 We push the intuition of redundancy further to find an
                 intuitive notion of novelty of an association rule,
                 with respect to other rules. Namely, an irredundant
                 rule is so because its confidence is higher than what
                 the rest of the rules would suggest; then, one can ask:
                 how much higher? We propose to measure such a sort of
                 novelty through the confidence boost of a rule. Acting
                 as a complement to confidence and support, the
                 confidence boost helps to obtain small and crisp sets
                 of mined association rules and solves the well-known
                 problem that, in certain cases, rules of negative
                 correlation may pass the confidence bound. We analyze
                 the properties of two versions of the notion of
                 confidence boost, one of them a natural generalization
                 of the other. We develop algorithms to filter rules
                 according to their confidence boost, compare the
                 concept to some similar notions in the literature, and
                 describe the results of some experimentation employing
                 the new notions on standard benchmark datasets. We
                 describe an open source association mining tool that
                 embodies one of our variants of confidence boost in
                 such a way that the data mining process does not
                 require the user to select any value for any
                 parameter.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ang:2013:CPN,
  author =       "Hock Hee Ang and Vivekanand Gopalkrishnan and Steven
                 C. H. Hoi and Wee Keong Ng",
  title =        "Classification in {P2P} networks with cascade support
                 vector machines",
  journal =      j-TKDD,
  volume =       "7",
  number =       "4",
  pages =        "20:1--20:??",
  month =        nov,
  year =         "2013",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2541268.2541273",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:09 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Classification in Peer-to-Peer (P2P) networks is
                 important to many real applications, such as
                 distributed intrusion detection, distributed
                 recommendation systems, and distributed antispam
                 detection. However, it is very challenging to perform
                 classification in P2P networks due to many practical
                 issues, such as scalability, peer dynamism, and
                 asynchronism. This article investigates the practical
                 techniques of constructing Support Vector Machine (SVM)
                 classifiers in the P2P networks. In particular, we
                 demonstrate how to efficiently cascade SVM in a P2P
                 network with the use of reduced SVM. In addition, we
                 propose to fuse the concept of cascade SVM with
                 bootstrap aggregation to effectively balance the
                 trade-off between classification accuracy, model
                 construction, and prediction cost. We provide
                 theoretical insights for the proposed solutions and
                 conduct an extensive set of empirical studies on a
                 number of large-scale datasets. Encouraging results
                 validate the efficacy of the proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2014:ISI,
  author =       "Wei Chen and Jie Tang",
  title =        "Introduction to special issue on computational aspects
                 of social and information networks: Theory,
                 methodologies, and applications {(TKDD-CASIN)}",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556608",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2014:USN,
  author =       "Zhi Yang and Christo Wilson and Xiao Wang and Tingting
                 Gao and Ben Y. Zhao and Yafei Dai",
  title =        "Uncovering social network {Sybils} in the wild",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556609",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Sybil accounts are fake identities created to unfairly
                 increase the power or resources of a single malicious
                 user. Researchers have long known about the existence
                 of Sybil accounts in online communities such as
                 file-sharing systems, but they have not been able to
                 perform large-scale measurements to detect them or
                 measure their activities. In this article, we describe
                 our efforts to detect, characterize, and understand
                 Sybil account activity in the Renren Online Social
                 Network (OSN). We use ground truth provided by Renren
                 Inc. to build measurement-based Sybil detectors and
                 deploy them on Renren to detect more than 100,000 Sybil
                 accounts. Using our full dataset of 650,000 Sybils, we
                 examine several aspects of Sybil behavior. First, we
                 study their link creation behavior and find that
                 contrary to prior conjecture, Sybils in OSNs do not
                 form tight-knit communities. Next, we examine the
                 fine-grained behaviors of Sybils on Renren using
                 clickstream data. Third, we investigate
                 behind-the-scenes collusion between large groups of
                 Sybils. Our results reveal that Sybils with no explicit
                 social ties still act in concert to launch attacks.
                 Finally, we investigate enhanced techniques to identify
                 stealthy Sybils. In summary, our study advances the
                 understanding of Sybil behavior on OSNs and shows that
                 Sybils can effectively avoid existing community-based
                 Sybil detectors. We hope that our results will foster
                 new research on Sybil detection that is based on novel
                 types of Sybil features.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jin:2014:SAR,
  author =       "Ruoming Jin and Victor E. Lee and Longjie Li",
  title =        "Scalable and axiomatic ranking of network role
                 similarity",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2518176",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A key task in analyzing social networks and other
                 complex networks is role analysis: describing and
                 categorizing nodes according to how they interact with
                 other nodes. Two nodes have the same role if they
                 interact with equivalent sets of neighbors. The most
                 fundamental role equivalence is automorphic
                 equivalence. Unfortunately, the fastest algorithms
                 known for graph automorphism are nonpolynomial.
                 Moreover, since exact equivalence is rare, a more
                 meaningful task is measuring the role similarity
                 between any two nodes. This task is closely related to
                 the structural or link-based similarity problem that
                 SimRank addresses. However, SimRank and other existing
                 similarity measures are not sufficient because they do
                 not guarantee to recognize automorphically or
                 structurally equivalent nodes. This article makes two
                 contributions. First, we present and justify several
                 axiomatic properties necessary for a role similarity
                 measure or metric. Second, we present RoleSim, a new
                 similarity metric that satisfies these axioms and can
                 be computed with a simple iterative algorithm. We
                 rigorously prove that RoleSim satisfies all of these
                 axiomatic properties. We also introduce Iceberg
                 RoleSim, a scalable algorithm that discovers all pairs
                 with RoleSim scores above a user-defined threshold $
                 \theta $. We demonstrate the interpretative power of
                 RoleSim on both synthetic and real datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mcauley:2014:DSC,
  author =       "Julian Mcauley and Jure Leskovec",
  title =        "Discovering social circles in ego networks",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556612",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "People's personal social networks are big and
                 cluttered, and currently there is no good way to
                 automatically organize them. Social networking sites
                 allow users to manually categorize their friends into
                 social circles (e.g., ``circles'' on Google+, and
                 ``lists'' on Facebook and Twitter). However, circles
                 are laborious to construct and must be manually updated
                 whenever a user's network grows. In this article, we
                 study the novel task of automatically identifying
                 users' social circles. We pose this task as a
                 multimembership node clustering problem on a user's ego
                 network, a network of connections between her friends.
                 We develop a model for detecting circles that combines
                 network structure as well as user profile information.
                 For each circle, we learn its members and the
                 circle-specific user profile similarity metric.
                 Modeling node membership to multiple circles allows us
                 to detect overlapping as well as hierarchically nested
                 circles. Experiments show that our model accurately
                 identifies circles on a diverse set of data from
                 Facebook, Google+, and Twitter, for all of which we
                 obtain hand-labeled ground truth.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Abrahao:2014:SFA,
  author =       "Bruno Abrahao and Sucheta Soundarajan and John
                 Hopcroft and Robert Kleinberg",
  title =        "A separability framework for analyzing community
                 structure",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2527231",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Four major factors govern the intricacies of community
                 extraction in networks: (1) the literature offers a
                 multitude of disparate community detection algorithms
                 whose output exhibits high structural variability
                 across the collection, (2) communities identified by
                 algorithms may differ structurally from real
                 communities that arise in practice, (3) there is no
                 consensus characterizing how to discriminate
                 communities from noncommunities, and (4) the
                 application domain includes a wide variety of networks
                 of fundamentally different natures. In this article, we
                 present a class separability framework to tackle these
                 challenges through a comprehensive analysis of
                 community properties. Our approach enables the
                 assessment of the structural dissimilarity among the
                 output of multiple community detection algorithms and
                 between the output of algorithms and communities that
                 arise in practice. In addition, our method provides us
                 with a way to organize the vast collection of community
                 detection algorithms by grouping those that behave
                 similarly. Finally, we identify the most discriminative
                 graph-theoretical properties of community signature and
                 the small subset of properties that account for most of
                 the biases of the different community detection
                 algorithms. We illustrate our approach with an
                 experimental analysis, which reveals nuances of the
                 structure of real and extracted communities. In our
                 experiments, we furnish our framework with the output
                 of 10 different community detection procedures,
                 representative of categories of popular algorithms
                 available in the literature, applied to a diverse
                 collection of large-scale real network datasets whose
                 domains span biology, online shopping, and social
                 systems. We also analyze communities identified by
                 annotations that accompany the data, which reflect
                 exemplar communities in various domain. We characterize
                 these communities using a broad spectrum of community
                 properties to produce the different structural classes.
                 As our experiments show that community structure is not
                 a universal concept, our framework enables an informed
                 choice of the most suitable community detection method
                 for identifying communities of a specific type in a
                 given network and allows for a comparison of existing
                 community detection algorithms while guiding the design
                 of new ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhong:2014:UBL,
  author =       "Erheng Zhong and Wei Fan and Qiang Yang",
  title =        "User behavior learning and transfer in composite
                 social networks",
  journal =      j-TKDD,
  volume =       "8",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2556613",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 13 09:16:11 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Accurate prediction of user behaviors is important for
                 many social media applications, including social
                 marketing, personalization, and recommendation. A major
                 challenge lies in that although many previous works
                 model user behavior from only historical behavior logs,
                 the available user behavior data or interactions
                 between users and items in a given social network are
                 usually very limited and sparse (e.g., $ \geq 99.9 \% $
                 empty), which makes models overfit the rare
                 observations and fail to provide accurate predictions.
                 We observe that many people are members of several
                 social networks in the same time, such as Facebook,
                 Twitter, and Tencent's QQ. Importantly, users'
                 behaviors and interests in different networks influence
                 one another. This provides an opportunity to leverage
                 the knowledge of user behaviors in different networks
                 by considering the overlapping users in different
                 networks as bridges, in order to alleviate the data
                 sparsity problem, and enhance the predictive
                 performance of user behavior modeling. Combining
                 different networks ``simply and naively'' does not work
                 well. In this article, we formulate the problem to
                 model multiple networks as ``adaptive composite
                 transfer'' and propose a framework called ComSoc.
                 ComSoc first selects the most suitable networks inside
                 a composite social network via a hierarchical Bayesian
                 model, parameterized for individual users. It then
                 builds topic models for user behavior prediction using
                 both the relationships in the selected networks and
                 related behavior data. With different relational
                 regularization, we introduce different implementations,
                 corresponding to different ways to transfer knowledge
                 from composite social relations. To handle big data, we
                 have implemented the algorithm using Map/Reduce. We
                 demonstrate that the proposed composite network-based
                 user behavior models significantly improve the
                 predictive accuracy over a number of existing
                 approaches on several real-world applications,
                 including a very large social networking dataset from
                 Tencent Inc.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ahmed:2014:NSS,
  author =       "Nesreen K. Ahmed and Jennifer Neville and Ramana
                 Kompella",
  title =        "Network Sampling: From Static to Streaming Graphs",
  journal =      j-TKDD,
  volume =       "8",
  number =       "2",
  pages =        "7:1--7:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601438",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Network sampling is integral to the analysis of
                 social, information, and biological networks. Since
                 many real-world networks are massive in size,
                 continuously evolving, and/or distributed in nature,
                 the network structure is often sampled in order to
                 facilitate study. For these reasons, a more thorough
                 and complete understanding of network sampling is
                 critical to support the field of network science. In
                 this paper, we outline a framework for the general
                 problem of network sampling by highlighting the
                 different objectives, population and units of interest,
                 and classes of network sampling methods. In addition,
                 we propose a spectrum of computational models for
                 network sampling methods, ranging from the
                 traditionally studied model based on the assumption of
                 a static domain to a more challenging model that is
                 appropriate for streaming domains. We design a family
                 of sampling methods based on the concept of graph
                 induction that generalize across the full spectrum of
                 computational models (from static to streaming) while
                 efficiently preserving many of the topological
                 properties of the input graphs. Furthermore, we
                 demonstrate how traditional static sampling algorithms
                 can be modified for graph streams for each of the three
                 main classes of sampling methods: node, edge, and
                 topology-based sampling. Experimental results indicate
                 that our proposed family of sampling methods more
                 accurately preserve the underlying properties of the
                 graph in both static and streaming domains. Finally, we
                 study the impact of network sampling algorithms on the
                 parameter estimation and performance evaluation of
                 relational classification algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ge:2014:RMA,
  author =       "Yong Ge and Guofei Jiang and Min Ding and Hui Xiong",
  title =        "Ranking Metric Anomaly in Invariant Networks",
  journal =      j-TKDD,
  volume =       "8",
  number =       "2",
  pages =        "8:1--8:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601436",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The management of large-scale distributed information
                 systems relies on the effective use and modeling of
                 monitoring data collected at various points in the
                 distributed information systems. A traditional approach
                 to model monitoring data is to discover invariant
                 relationships among the monitoring data. Indeed, we can
                 discover all invariant relationships among all pairs of
                 monitoring data and generate invariant networks, where
                 a node is a monitoring data source (metric) and a link
                 indicates an invariant relationship between two
                 monitoring data. Such an invariant network
                 representation can help system experts to localize and
                 diagnose the system faults by examining those broken
                 invariant relationships and their related metrics,
                 since system faults usually propagate among the
                 monitoring data and eventually lead to some broken
                 invariant relationships. However, at one time, there
                 are usually a lot of broken links (invariant
                 relationships) within an invariant network. Without
                 proper guidance, it is difficult for system experts to
                 manually inspect this large number of broken links. To
                 this end, in this article, we propose the problem of
                 ranking metrics according to the anomaly levels for a
                 given invariant network, while this is a nontrivial
                 task due to the uncertainties and the complex nature of
                 invariant networks. Specifically, we propose two types
                 of algorithms for ranking metric anomaly by link
                 analysis in invariant networks. Along this line, we
                 first define two measurements to quantify the anomaly
                 level of each metric, and introduce the m Rank
                 algorithm. Also, we provide a weighted score mechanism
                 and develop the g Rank algorithm, which involves an
                 iterative process to obtain a score to measure the
                 anomaly levels. In addition, some extended algorithms
                 based on m Rank and g Rank algorithms are developed by
                 taking into account the probability of being broken as
                 well as noisy links. Finally, we validate all the
                 proposed algorithms on a large number of real-world and
                 synthetic data sets to illustrate the effectiveness and
                 efficiency of different algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2014:DGP,
  author =       "Gensheng Zhang and Xiao Jiang and Ping Luo and Min
                 Wang and Chengkai Li",
  title =        "Discovering General Prominent Streaks in Sequence
                 Data",
  journal =      j-TKDD,
  volume =       "8",
  number =       "2",
  pages =        "9:1--9:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601439",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article studies the problem of prominent streak
                 discovery in sequence data. Given a sequence of values,
                 a prominent streak is a long consecutive subsequence
                 consisting of only large (small) values, such as
                 consecutive games of outstanding performance in sports,
                 consecutive hours of heavy network traffic, and
                 consecutive days of frequent mentioning of a person in
                 social media. Prominent streak discovery provides
                 insightful data patterns for data analysis in many
                 real-world applications and is an enabling technique
                 for computational journalism. Given its real-world
                 usefulness and complexity, the research on prominent
                 streaks in sequence data opens a spectrum of
                 challenging problems. A baseline approach to finding
                 prominent streaks is a quadratic algorithm that
                 exhaustively enumerates all possible streaks and
                 performs pairwise streak dominance comparison. For more
                 efficient methods, we make the observation that
                 prominent streaks are in fact skyline points in two
                 dimensions-streak interval length and minimum value in
                 the interval. Our solution thus hinges on the idea to
                 separate the two steps in prominent streak discovery:
                 candidate streak generation and skyline operation over
                 candidate streaks. For candidate generation, we propose
                 the concept of local prominent streak (LPS). We prove
                 that prominent streaks are a subset of LPSs and the
                 number of LPSs is less than the length of a data
                 sequence, in comparison with the quadratic number of
                 candidates produced by the brute-force baseline method.
                 We develop efficient algorithms based on the concept of
                 LPS. The nonlinear local prominent streak (NLPS)-based
                 method considers a superset of LPSs as candidates, and
                 the linear local prominent streak (LLPS)-based method
                 further guarantees to consider only LPSs. The proposed
                 properties and algorithms are also extended for
                 discovering general top-$k$, multisequence, and
                 multidimensional prominent streaks. The results of
                 experiments using multiple real datasets verified the
                 effectiveness of the proposed methods and showed orders
                 of magnitude performance improvement against the
                 baseline method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Schifanella:2014:MTD,
  author =       "Claudio Schifanella and K. Sel{\c{c}}uk Candan and
                 Maria Luisa Sapino",
  title =        "Multiresolution Tensor Decompositions with Mode
                 Hierarchies",
  journal =      j-TKDD,
  volume =       "8",
  number =       "2",
  pages =        "10:1--10:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2532169",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:22 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Tensors (multidimensional arrays) are widely used for
                 representing high-order dimensional data, in
                 applications ranging from social networks, sensor data,
                 and Internet traffic. Multiway data analysis
                 techniques, in particular tensor decompositions, allow
                 extraction of hidden correlations among multiway data
                 and thus are key components of many data analysis
                 frameworks. Intuitively, these algorithms can be
                 thought of as multiway clustering schemes, which
                 consider multiple facets of the data in identifying
                 clusters, their weights, and contributions of each data
                 element. Unfortunately, algorithms for fitting multiway
                 models are, in general, iterative and very time
                 consuming. In this article, we observe that, in many
                 applications, there is a priori background knowledge
                 (or metadata) about one or more domain dimensions. This
                 metadata is often in the form of a hierarchy that
                 clusters the elements of a given data facet (or mode).
                 We investigate whether such single-mode data
                 hierarchies can be used to boost the efficiency of
                 tensor decomposition process, without significant
                 impact on the final decomposition quality. We consider
                 each domain hierarchy as a guide to help provide
                 higher- or lower-resolution views of the data in the
                 tensor on demand and we rely on these metadata-induced
                 multiresolution tensor representations to develop a
                 multiresolution approach to tensor decomposition. In
                 this article, we focus on an alternating least squares
                 (ALS)--based implementation of the two most important
                 decomposition models such as the PARAllel FACtors
                 (PARAFAC, which decomposes a tensor into a diagonal
                 tensor and a set of factor matrices) and the Tucker
                 (which produces as result a core tensor and a set of
                 dimension-subspaces matrices). Experiment results show
                 that, when the available metadata is used as a rough
                 guide, the proposed multiresolution method helps fit
                 both PARAFAC and Tucker models with consistent (under
                 different parameters settings) savings in execution
                 time and memory consumption, while preserving the
                 quality of the decomposition.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2014:RMN,
  author =       "Jin Huang and Feiping Nie and Heng Huang and Chris
                 Ding",
  title =        "Robust Manifold Nonnegative Matrix Factorization",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "11:1--11:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601434",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 3 13:50:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Nonnegative Matrix Factorization (NMF) has been one of
                 the most widely used clustering techniques for
                 exploratory data analysis. However, since each data
                 point enters the objective function with squared
                 residue error, a few outliers with large errors easily
                 dominate the objective function. In this article, we
                 propose a Robust Manifold Nonnegative Matrix
                 Factorization (RMNMF) method using l$_{2, 1}$ -norm and
                 integrating NMF and spectral clustering under the same
                 clustering framework. We also point out the solution
                 uniqueness issue for the existing NMF methods and
                 propose an additional orthonormal constraint to address
                 this problem. With the new constraint, the conventional
                 auxiliary function approach no longer works. We tackle
                 this difficult optimization problem via a novel
                 Augmented Lagrangian Method (ALM)--based algorithm and
                 convert the original constrained optimization problem
                 on one variable into a multivariate constrained
                 problem. The new objective function then can be
                 decomposed into several subproblems that each has a
                 closed-form solution. More importantly, we reveal the
                 connection of our method with robust K -means and
                 spectral clustering, and we demonstrate its theoretical
                 significance. Extensive experiments have been conducted
                 on nine benchmark datasets, and all empirical results
                 show the effectiveness of our method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2014:RAL,
  author =       "Yu Zhang and Dit-Yan Yeung",
  title =        "A Regularization Approach to Learning Task
                 Relationships in Multitask Learning",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "12:1--12:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2538028",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 3 13:50:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multitask learning is a learning paradigm that seeks
                 to improve the generalization performance of a learning
                 task with the help of some other related tasks. In this
                 article, we propose a regularization approach to
                 learning the relationships between tasks in multitask
                 learning. This approach can be viewed as a novel
                 generalization of the regularized formulation for
                 single-task learning. Besides modeling positive task
                 correlation, our approach-multitask relationship
                 learning (MTRL)-can also describe negative task
                 correlation and identify outlier tasks based on the
                 same underlying principle. By utilizing a
                 matrix-variate normal distribution as a prior on the
                 model parameters of all tasks, our MTRL method has a
                 jointly convex objective function. For efficiency, we
                 use an alternating method to learn the optimal model
                 parameters for each task as well as the relationships
                 between tasks. We study MTRL in the symmetric multitask
                 learning setting and then generalize it to the
                 asymmetric setting as well. We also discuss some
                 variants of the regularization approach to demonstrate
                 the use of other matrix-variate priors for learning
                 task relationships. Moreover, to gain more insight into
                 our model, we also study the relationships between MTRL
                 and some existing multitask learning methods.
                 Experiments conducted on a toy problem as well as
                 several benchmark datasets demonstrate the
                 effectiveness of MTRL as well as its high
                 interpretability revealed by the task covariance
                 matrix.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2014:SCR,
  author =       "Ming Lin and Shifeng Weng and Changshui Zhang",
  title =        "On the Sample Complexity of Random {Fourier} Features
                 for Online Learning: How Many Random {Fourier} Features
                 Do We Need?",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "13:1--13:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2611378",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 3 13:50:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We study the sample complexity of random Fourier
                 features for online kernel learning-that is, the number
                 of random Fourier features required to achieve good
                 generalization performance. We show that when the loss
                 function is strongly convex and smooth, online kernel
                 learning with random Fourier features can achieve an $
                 O (l o g T / T) $ bound for the excess risk with only $
                 O (1 / \lambda^2) $ random Fourier features, where T is
                 the number of training examples and \lambda is the
                 modulus of strong convexity. This is a significant
                 improvement compared to the existing result for batch
                 kernel learning that requires $ O(T) $ random Fourier
                 features to achieve a generalization bound $ O(1 /
                 \sqrt T) $. Our empirical study verifies that online
                 kernel learning with a limited number of random Fourier
                 features can achieve similar generalization performance
                 as online learning using full kernel matrix. We also
                 present an enhanced online learning algorithm with
                 random Fourier features that improves the
                 classification performance by multiple passes of
                 training examples and a partial average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Eyal:2014:PIM,
  author =       "Ron Eyal and Avi Rosenfeld and Sigal Sina and Sarit
                 Kraus",
  title =        "Predicting and Identifying Missing Node Information in
                 Social Networks",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "14:1--14:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2536775",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:23 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In recent years, social networks have surged in
                 popularity. One key aspect of social network research
                 is identifying important missing information that is
                 not explicitly represented in the network, or is not
                 visible to all. To date, this line of research
                 typically focused on finding the connections that are
                 missing between nodes, a challenge typically termed as
                 the link prediction problem. This article introduces
                 the missing node identification problem, where missing
                 members in the social network structure must be
                 identified. In this problem, indications of missing
                 nodes are assumed to exist. Given these indications and
                 a partial network, we must assess which indications
                 originate from the same missing node and determine the
                 full network structure. Toward solving this problem, we
                 present the missing node identification by spectral
                 clustering algorithm (MISC), an approach based on a
                 spectral clustering algorithm, combined with nodes'
                 pairwise affinity measures that were adopted from link
                 prediction research. We evaluate the performance of our
                 approach in different problem settings and scenarios,
                 using real-life data from Facebook. The results show
                 that our approach has beneficial results and can be
                 effective in solving the missing node identification
                 problem. In addition, this article also presents
                 R-MISC, which uses a sparse matrix representation,
                 efficient algorithms for calculating the nodes'
                 pairwise affinity, and a proprietary dimension
                 reduction technique to enable scaling the MISC
                 algorithm to large networks of more than 100,000 nodes.
                 Last, we consider problem settings where some of the
                 indications are unknown. Two algorithms are suggested
                 for this problem: speculative MISC, based on MISC, and
                 missing link completion, based on classical link
                 prediction literature. We show that speculative MISC
                 outperforms missing link completion.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Webb:2014:EDM,
  author =       "Geoffrey I. Webb and Jilles Vreeken",
  title =        "Efficient Discovery of the Most Interesting
                 Associations",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "15:1--15:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601433",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Jun 26 05:48:23 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Self-sufficient itemsets have been proposed as an
                 effective approach to summarizing the key associations
                 in data. However, their computation appears highly
                 demanding, as assessing whether an itemset is
                 self-sufficient requires consideration of all pairwise
                 partitions of the itemset into pairs of subsets as well
                 as consideration of all supersets. This article
                 presents the first published algorithm for efficiently
                 discovering self-sufficient itemsets. This
                 branch-and-bound algorithm deploys two powerful pruning
                 mechanisms based on upper bounds on itemset value and
                 statistical significance level. It demonstrates that
                 finding top-$k$ productive and nonredundant itemsets,
                 with postprocessing to identify those that are not
                 independently productive, can efficiently identify
                 small sets of key associations. We present extensive
                 evaluation of the strengths and limitations of the
                 technique, including comparisons with alternative
                 approaches to finding the most interesting
                 associations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shabtai:2014:ODM,
  author =       "Asaf Shabtai and Maya Bercovitch and Lior Rokach and
                 Yuval Elovici",
  title =        "Optimizing Data Misuse Detection",
  journal =      j-TKDD,
  volume =       "8",
  number =       "3",
  pages =        "16:1--16:??",
  month =        jun,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2611520",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 3 13:50:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Data misuse may be performed by entities such as an
                 organization's employees and business partners who are
                 granted access to sensitive information and misuse
                 their privileges. We assume that users can be either
                 trusted or untrusted. The access of untrusted parties
                 to data objects (e.g., client and patient records)
                 should be monitored in an attempt to detect misuse.
                 However, monitoring data objects is resource intensive
                 and time-consuming and may also cause disturbance or
                 inconvenience to the involved employees. Therefore, the
                 monitored data objects should be carefully selected. In
                 this article, we present two optimization problems
                 carefully designed for selecting specific data objects
                 for monitoring, such that the detection rate is
                 maximized and the monitoring effort is minimized. In
                 the first optimization problem, the goal is to select
                 data objects for monitoring that are accessed by at
                 most c trusted agents while ensuring access to at least
                 k monitored objects by each untrusted agent (both c and
                 k are integer variable). As opposed to the first
                 optimization problem, the goal of the second
                 optimization problem is to select monitored data
                 objects that maximize the number of monitored data
                 objects accessed by untrusted agents while ensuring
                 that each trusted agent does not access more than d
                 monitored data objects (d is an integer variable as
                 well). Two efficient heuristic algorithms for solving
                 these optimization problems are proposed, and
                 experiments were conducted simulating different
                 scenarios to evaluate the algorithms' performance.
                 Moreover, we compared the heuristic algorithms'
                 performance to the optimal solution and conducted
                 sensitivity analysis on the three parameters (c, k, and
                 d) and on the ratio between the trusted and untrusted
                 agents.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hernandez-Orallo:2014:PRC,
  author =       "Jos{\'e} Hern{\'a}ndez-Orallo",
  title =        "Probabilistic Reframing for Cost-Sensitive
                 Regression",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "17:1--17:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641758",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:02 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Common-day applications of predictive models usually
                 involve the full use of the available contextual
                 information. When the operating context changes, one
                 may fine-tune the by-default (incontextual) prediction
                 or may even abstain from predicting a value (a reject).
                 Global reframing solutions, where the same function is
                 applied to adapt the estimated outputs to a new cost
                 context, are possible solutions here. An alternative
                 approach, which has not been studied in a comprehensive
                 way for regression in the knowledge discovery and data
                 mining literature, is the use of a local (e.g.,
                 probabilistic) reframing approach, where decisions are
                 made according to the estimated output and a
                 reliability, confidence, or probability estimation. In
                 this article, we advocate for a simple two-parameter
                 (mean and variance) approach, working with a normal
                 conditional probability density. Given the conditional
                 mean produced by any regression technique, we develop
                 lightweight ``enrichment'' methods that produce good
                 estimates of the conditional variance, which are used
                 by the probabilistic (local) reframing methods. We
                 apply these methods to some very common families of
                 cost-sensitive problems, such as optimal predictions in
                 (auction) bids, asymmetric loss scenarios, and
                 rejection rules.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Miettinen:2014:MMD,
  author =       "Pauli Miettinen and Jilles Vreeken",
  title =        "{MDL4BMF}: Minimum Description Length for {Boolean}
                 Matrix Factorization",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601437",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:45:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Matrix factorizations-where a given data matrix is
                 approximated by a product of two or more factor
                 matrices-are powerful data mining tools. Among other
                 tasks, matrix factorizations are often used to separate
                 global structure from noise. This, however, requires
                 solving the ``model order selection problem'' of
                 determining the proper rank of the factorization, that
                 is, to answer where fine-grained structure stops, and
                 where noise starts. Boolean Matrix Factorization
                 (BMF)-where data, factors, and matrix product are
                 Boolean-has in recent years received increased
                 attention from the data mining community. The technique
                 has desirable properties, such as high interpretability
                 and natural sparsity. Yet, so far no method for
                 selecting the correct model order for BMF has been
                 available. In this article, we propose the use of the
                 Minimum Description Length (MDL) principle for this
                 task. Besides solving the problem, this well-founded
                 approach has numerous benefits; for example, it is
                 automatic, does not require a likelihood function, is
                 fast, and, as experiments show, is highly accurate. We
                 formulate the description length function for BMF in
                 general-making it applicable for any BMF algorithm. We
                 discuss how to construct an appropriate encoding:
                 starting from a simple and intuitive approach, we
                 arrive at a highly efficient data-to-model--based
                 encoding for BMF. We extend an existing algorithm for
                 BMF to use MDL to identify the best Boolean matrix
                 factorization, analyze the complexity of the problem,
                 and perform an extensive experimental evaluation to
                 study its behavior.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2014:FSS,
  author =       "Jiliang Tang and Huan Liu",
  title =        "Feature Selection for Social Media Data",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629587",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:45:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Feature selection is widely used in preparing
                 high-dimensional data for effective data mining. The
                 explosive popularity of social media produces massive
                 and high-dimensional data at an unprecedented rate,
                 presenting new challenges to feature selection. Social
                 media data consists of (1) traditional
                 high-dimensional, attribute-value data such as posts,
                 tweets, comments, and images, and (2) linked data that
                 provides social context for posts and describes the
                 relationships between social media users as well as who
                 generates the posts, and so on. The nature of social
                 media also determines that its data is massive, noisy,
                 and incomplete, which exacerbates the already
                 challenging problem of feature selection. In this
                 article, we study a novel feature selection problem of
                 selecting features for social media data with its
                 social context. In detail, we illustrate the
                 differences between attribute-value data and social
                 media data, investigate if linked data can be exploited
                 in a new feature selection framework by taking
                 advantage of social science theories. We design and
                 conduct experiments on datasets from real-world social
                 media Web sites, and the empirical results demonstrate
                 that the proposed framework can significantly improve
                 the performance of feature selection. Further
                 experiments are conducted to evaluate the effects of
                 user--user and user--post relationships manifested in
                 linked data on feature selection, and research issues
                 for future work will be discussed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Riondato:2014:EDA,
  author =       "Matteo Riondato and Eli Upfal",
  title =        "Efficient Discovery of Association Rules and Frequent
                 Itemsets through Sampling with Tight Performance
                 Guarantees",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629586",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:45:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The tasks of extracting (top-$K$) Frequent Itemsets
                 (FIs) and Association Rules (ARs) are fundamental
                 primitives in data mining and database applications.
                 Exact algorithms for these problems exist and are
                 widely used, but their running time is hindered by the
                 need of scanning the entire dataset, possibly multiple
                 times. High-quality approximations of FIs and ARs are
                 sufficient for most practical uses. Sampling techniques
                 can be used for fast discovery of approximate
                 solutions, but works exploring this technique did not
                 provide satisfactory performance guarantees on the
                 quality of the approximation due to the difficulty of
                 bounding the probability of under- or oversampling any
                 one of an unknown number of frequent itemsets. We
                 circumvent this issue by applying the statistical
                 concept of Vapnik--Chervonenkis (VC) dimension to
                 develop a novel technique for providing tight bounds on
                 the sample size that guarantees approximation of the
                 (top-$K$) FIs and ARs within user-specified parameters.
                 The resulting sample size is linearly dependent on the
                 VC-dimension of a range space associated with the
                 dataset. We analyze the VC-dimension of this range
                 space and show that it is upper bounded by an
                 easy-to-compute characteristic quantity of the dataset,
                 the d-index, namely, the maximum integer d such that
                 the dataset contains at least d transactions of length
                 at least d such that no one of them is a superset of or
                 equal to another. We show that this bound is tight for
                 a large class of datasets. The resulting sample size is
                 a significant improvement over previous known results.
                 We present an extensive experimental evaluation of our
                 technique on real and artificial datasets,
                 demonstrating the practicality of our methods, and
                 showing that they achieve even higher quality
                 approximations than what is guaranteed by the
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Burton:2014:DSC,
  author =       "Scott H. Burton and Christophe G. Giraud-Carrier",
  title =        "Discovering Social Circles in Directed Graphs",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "21:1--21:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641759",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:02 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We examine the problem of identifying social circles,
                 or sets of cohesive and mutually aware nodes
                 surrounding an initial query set, in directed graphs
                 where the complete graph is not known beforehand. This
                 problem differs from local community mining, in that
                 the query set defines the circle of interest. We
                 explicitly handle edge direction, as in many cases
                 relationships are not symmetric, and focus on the local
                 context because many real-world graphs cannot be
                 feasibly known. We outline several issues that are
                 unique to this context, introduce a quality function to
                 measure the value of including a particular node in an
                 emerging social circle, and describe a greedy social
                 circle discovery algorithm. We demonstrate the
                 effectiveness of this approach on artificial
                 benchmarks, large networks with topical community
                 labels, and several real-world case studies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Paul:2014:RPL,
  author =       "Saurabh Paul and Christos Boutsidis and Malik
                 Magdon-Ismail and Petros Drineas",
  title =        "Random Projections for Linear Support Vector
                 Machines",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "22:1--22:??",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641760",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:45:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Let $X$ be a data matrix of rank $ \rho $, whose rows
                 represent $n$ points in $d$-dimensional space. The
                 linear support vector machine constructs a hyperplane
                 separator that maximizes the 1-norm soft margin. We
                 develop a new oblivious dimension reduction technique
                 that is precomputed and can be applied to any input
                 matrix $X$. We prove that, with high probability, the
                 margin and minimum enclosing ball in the feature space
                 are preserved to within $ \epsilon $-relative error,
                 ensuring comparable generalization as in the original
                 space in the case of classification. For regression, we
                 show that the margin is preserved to $ \epsilon
                 $-relative error with high probability. We present
                 extensive experiments with real and synthetic data to
                 support our theory.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Erdo:2014:RGN,
  author =       "D{\'o}ra Erd{\H{o}}s and Rainer Gemulla and Evimaria
                 Terzi",
  title =        "Reconstructing Graphs from Neighborhood Data",
  journal =      j-TKDD,
  volume =       "8",
  number =       "4",
  pages =        "23:1--23:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641761",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:02 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Consider a social network and suppose that we are only
                 given the number of common friends between each pair of
                 users. Can we reconstruct the underlying network?
                 Similarly, consider a set of documents and the words
                 that appear in them. If we only know the number of
                 common words for every pair of documents, as well as
                 the number of common documents for every pair of words,
                 can we infer which words appear in which documents? In
                 this article, we develop a general methodology for
                 answering questions like these. We formalize these
                 questions in what we call the {\em R}econstruct
                 problem: given information about the common neighbors
                 of nodes in a network, our goal is to reconstruct the
                 hidden binary matrix that indicates the presence or
                 absence of relationships between individual nodes. In
                 fact, we propose two different variants of this
                 problem: one where the number of connections of every
                 node (i.e., the degree of every node) is known and a
                 second one where it is unknown. We call these variants
                 the degree-aware and the degree-oblivious versions of
                 the Reconstruct problem, respectively. Our algorithms
                 for both variants exploit the properties of the
                 singular value decomposition of the hidden binary
                 matrix. More specifically, we show that using the
                 available neighborhood information, we can reconstruct
                 the hidden matrix by finding the components of its
                 singular value decomposition and then combining them
                 appropriately. Our extensive experimental study
                 suggests that our methods are able to reconstruct
                 binary matrices of different characteristics with up to
                 100\% accuracy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Acharya:2014:OFC,
  author =       "Ayan Acharya and Eduardo R. Hruschka and Joydeep Ghosh
                 and Sreangsu Acharyya",
  title =        "An Optimization Framework for Combining Ensembles of
                 Classifiers and Clusterers with Applications to
                 Nontransductive Semisupervised Learning and Transfer
                 Learning",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2601435",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Unsupervised models can provide supplementary soft
                 constraints to help classify new ``target'' data
                 because similar instances in the target set are more
                 likely to share the same class label. Such models can
                 also help detect possible differences between training
                 and target distributions, which is useful in
                 applications where concept drift may take place, as in
                 transfer learning settings. This article describes a
                 general optimization framework that takes as input
                 class membership estimates from existing classifiers
                 learned on previously encountered ``source'' (or
                 training) data, as well as a similarity matrix from a
                 cluster ensemble operating solely on the target (or
                 test) data to be classified, and yields a consensus
                 labeling of the target data. More precisely, the
                 application settings considered are nontransductive
                 semisupervised and transfer learning scenarios where
                 the training data are used only to build an ensemble of
                 classifiers and are subsequently discarded before
                 classifying the target data. The framework admits a
                 wide range of loss functions and
                 classification/clustering methods. It exploits
                 properties of Bregman divergences in conjunction with
                 Legendre duality to yield a principled and scalable
                 approach. A variety of experiments show that the
                 proposed framework can yield results substantially
                 superior to those provided by na{\"\i}vely applying
                 classifiers learned on the original task to the target
                 data. In addition, we show that the proposed approach,
                 even not being conceptually transductive, can provide
                 better results compared to some popular transductive
                 learning techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Boedihardjo:2014:FEL,
  author =       "Arnold P. Boedihardjo and Chang-Tien Lu and Bingsheng
                 Wang",
  title =        "A Framework for Exploiting Local Information to
                 Enhance Density Estimation of Data Streams",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629618",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The Probability Density Function (PDF) is the
                 fundamental data model for a variety of stream mining
                 algorithms. Existing works apply the standard
                 nonparametric Kernel Density Estimator (KDE) to
                 approximate the PDF of data streams. As a result, the
                 stream-based KDEs cannot accurately capture complex
                 local density features. In this article, we propose the
                 use of Local Region (LRs) to model local density
                 information in univariate data streams. In-depth
                 theoretical analyses are presented to justify the
                 effectiveness of the LR-based KDE. Based on the
                 analyses, we develop the General Local rEgion AlgorithM
                 (GLEAM) to enhance the estimation quality of
                 structurally complex univariate distributions for
                 existing stream-based KDEs. A set of algorithmic
                 optimizations is designed to improve the query
                 throughput of GLEAM and to achieve its linear order
                 computation. Additionally, a comprehensive suite of
                 experiments was conducted to test the effectiveness and
                 efficiency of GLEAM.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ordonez:2014:BVS,
  author =       "Carlos Ordonez and Carlos Garcia-Alvarado and
                 Veerabhadaran Baladandayuthapani",
  title =        "{Bayesian} Variable Selection in Linear Regression in
                 One Pass for Large Datasets",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629617",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Bayesian models are generally computed with Markov
                 Chain Monte Carlo (MCMC) methods. The main disadvantage
                 of MCMC methods is the large number of iterations they
                 need to sample the posterior distributions of model
                 parameters, especially for large datasets. On the other
                 hand, variable selection remains a challenging problem
                 due to its combinatorial search space, where Bayesian
                 models are a promising solution. In this work, we study
                 how to accelerate Bayesian model computation for
                 variable selection in linear regression. We propose a
                 fast Gibbs sampler algorithm, a widely used MCMC method
                 that incorporates several optimizations. We use a
                 Zellner prior for the regression coefficients, an
                 improper prior on variance, and a conjugate prior
                 Gaussian distribution, which enable dataset
                 summarization in one pass, thus exploiting an augmented
                 set of sufficient statistics. Thereafter, the algorithm
                 iterates in main memory. Sufficient statistics are
                 indexed with a sparse binary vector to efficiently
                 compute matrix projections based on selected variables.
                 Discovered variable subsets probabilities, selecting
                 and discarding each variable, are stored on a hash
                 table for fast retrieval in future iterations. We study
                 how to integrate our algorithm into a Database
                 Management System (DBMS), exploiting aggregate
                 User-Defined Functions for parallel data summarization
                 and stored procedures to manipulate matrices with
                 arrays. An experimental evaluation with real datasets
                 evaluates accuracy and time performance, comparing our
                 DBMS-based algorithm with the R package. Our algorithm
                 is shown to produce accurate results, scale linearly on
                 dataset size, and run orders of magnitude faster than
                 the R package.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fei:2014:SSB,
  author =       "Hongliang Fei and Jun Huan",
  title =        "Structured Sparse Boosting for Graph Classification",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629328",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Boosting is a highly effective algorithm that produces
                 a linear combination of weak classifiers (a.k.a. base
                 learners) to obtain high-quality classification models.
                 In this article, we propose a generalized logit boost
                 algorithm in which base learners have structural
                 relationships in the functional space. Although such
                 relationships are generic, our work is particularly
                 motivated by the emerging topic of pattern-based
                 classification for semistructured data including
                 graphs. Toward an efficient incorporation of the
                 structure information, we have designed a general model
                 in which we use an undirected graph to capture the
                 relationship of subgraph-based base learners. In our
                 method, we employ both L$_1$ and Laplacian-based L$_2$
                 regularization to logit boosting to achieve model
                 sparsity and smoothness in the functional space spanned
                 by the base learners. We have derived efficient
                 optimization algorithms based on coordinate descent for
                 the new boosting formulation and theoretically prove
                 that it exhibits a natural grouping effect for nearby
                 spatial or overlapping base learners and that the
                 resulting estimator is consistent. Additionally,
                 motivated by the connection between logit boosting and
                 logistic regression, we extend our structured sparse
                 regularization framework to logistic regression for
                 vectorial data in which features are structured. Using
                 comprehensive experimental study and comparing our work
                 with the state-of-the-art, we have demonstrated the
                 effectiveness of the proposed learning method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2014:GGB,
  author =       "Zhiqiang Xu and Yiping Ke and Yi Wang and Hong Cheng
                 and James Cheng",
  title =        "{GBAGC}: a General {Bayesian} Framework for Attributed
                 Graph Clustering",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629616",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Graph clustering, also known as community detection,
                 is a long-standing problem in data mining. In recent
                 years, with the proliferation of rich attribute
                 information available for objects in real-world graphs,
                 how to leverage not only structural but also attribute
                 information for clustering attributed graphs becomes a
                 new challenge. Most existing works took a
                 distance-based approach. They proposed various distance
                 measures to fuse structural and attribute information
                 and then applied standard techniques for graph
                 clustering based on these distance measures. In this
                 article, we take an alternative view and propose a
                 novel Bayesian framework for attributed graph
                 clustering. Our framework provides a general and
                 principled solution to modeling both the structural and
                 the attribute aspects of a graph. It avoids the
                 artificial design of a distance measure in existing
                 methods and, furthermore, can seamlessly handle graphs
                 with different types of edges and vertex attributes. We
                 develop an efficient variational method for graph
                 clustering under this framework and derive two concrete
                 algorithms for clustering unweighted and weighted
                 attributed graphs. Experimental results on large
                 real-world datasets show that our algorithms
                 significantly outperform the state-of-the-art
                 distance-based method, in terms of both effectiveness
                 and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Coscia:2014:UHO,
  author =       "Michele Coscia and Giulio Rossetti and Fosca Giannotti
                 and Dino Pedreschi",
  title =        "Uncovering Hierarchical and Overlapping Communities
                 with a Local-First Approach",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629511",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Aug 26 17:49:05 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community discovery in complex networks is the task of
                 organizing a network's structure by grouping together
                 nodes related to each other. Traditional approaches are
                 based on the assumption that there is a global-level
                 organization in the network. However, in many
                 scenarios, each node is the bearer of complex
                 information and cannot be classified in disjoint
                 clusters. The top-down global view of the partition
                 approach is not designed for this. Here, we represent
                 this complex information as multiple latent labels, and
                 we postulate that edges in the networks are created
                 among nodes carrying similar labels. The latent labels
                 are the communities a node belongs to and we discover
                 them with a simple local-first approach to community
                 discovery. This is achieved by democratically letting
                 each node vote for the communities it sees surrounding
                 it in its limited view of the global system, its ego
                 neighborhood, using a label propagation algorithm,
                 assuming that each node is aware of the label it shares
                 with each of its connections. The local communities are
                 merged hierarchically, unveiling the modular
                 organization of the network at the global level and
                 identifying overlapping groups and groups of groups. We
                 tested this intuition against the state-of-the-art
                 overlapping community discovery and found that our new
                 method advances in the chosen scenarios in the quality
                 of the obtained communities. We perform a test on
                 benchmark and on real-world networks, evaluating the
                 quality of the community coverage by using the
                 extracted communities to predict the metadata attached
                 to the nodes, which we consider external information
                 about the latent labels. We also provide an explanation
                 about why real-world networks contain overlapping
                 communities and how our logic is able to capture them.
                 Finally, we show how our method is deterministic, is
                 incremental, and has a limited time complexity, so that
                 it can be used on real-world scale networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2014:GML,
  author =       "Guangtao Wang and Qinbao Song and Xueying Zhang and
                 Kaiyuan Zhang",
  title =        "A Generic Multilabel Learning-Based Classification
                 Algorithm Recommendation Method",
  journal =      j-TKDD,
  volume =       "9",
  number =       "1",
  pages =        "7:1--7:??",
  month =        oct,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629474",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Oct 10 17:19:10 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "As more and more classification algorithms continue to
                 be developed, recommending appropriate algorithms to a
                 given classification problem is increasingly important.
                 This article first distinguishes the algorithm
                 recommendation methods by two dimensions: (1)
                 meta-features, which are a set of measures used to
                 characterize the learning problems, and (2)
                 meta-target, which represents the relative performance
                 of the classification algorithms on the learning
                 problem. In contrast to the existing algorithm
                 recommendation methods whose meta-target is usually in
                 the form of either the ranking of candidate algorithms
                 or a single algorithm, this article proposes a new and
                 natural multilabel form to describe the meta-target.
                 This is due to the fact that there would be multiple
                 algorithms being appropriate for a given problem in
                 practice. Furthermore, a novel multilabel
                 learning-based generic algorithm recommendation method
                 is proposed, which views the algorithm recommendation
                 as a multilabel learning problem and solves the problem
                 by the mature multilabel learning algorithms. To
                 evaluate the proposed multilabel learning-based
                 recommendation method, extensive experiments with 13
                 well-known classification algorithms, two kinds of
                 meta-targets such as algorithm ranking and single
                 algorithm, and five different kinds of meta-features
                 are conducted on 1,090 benchmark learning problems. The
                 results show the effectiveness of our proposed
                 multilabel learning-based recommendation method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2014:EEM,
  author =       "Pinghui Wang and John C. S. Lui and Bruno Ribeiro and
                 Don Towsley and Junzhou Zhao and Xiaohong Guan",
  title =        "Efficiently Estimating Motif Statistics of Large
                 Networks",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "8:1--8:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629564",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Exploring statistics of locally connected subgraph
                 patterns (also known as network motifs) has helped
                 researchers better understand the structure and
                 function of biological and Online Social Networks
                 (OSNs). Nowadays, the massive size of some critical
                 networks-often stored in already overloaded relational
                 databases-effectively limits the rate at which nodes
                 and edges can be explored, making it a challenge to
                 accurately discover subgraph statistics. In this work,
                 we propose sampling methods to accurately estimate
                 subgraph statistics from as few queried nodes as
                 possible. We present sampling algorithms that
                 efficiently and accurately estimate subgraph properties
                 of massive networks. Our algorithms require no
                 precomputation or complete network topology
                 information. At the same time, we provide theoretical
                 guarantees of convergence. We perform experiments using
                 widely known datasets and show that, for the same
                 accuracy, our algorithms require an order of magnitude
                 less queries (samples) than the current
                 state-of-the-art algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zheng:2014:FHE,
  author =       "Li Zheng and Tao Li and Chris Ding",
  title =        "A Framework for Hierarchical Ensemble Clustering",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "9:1--9:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2611380",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Ensemble clustering, as an important extension of the
                 clustering problem, refers to the problem of combining
                 different (input) clusterings of a given dataset to
                 generate a final (consensus) clustering that is a
                 better fit in some sense than existing clusterings.
                 Over the past few years, many ensemble clustering
                 approaches have been developed. However, most of them
                 are designed for partitional clustering methods, and
                 few research efforts have been reported for ensemble
                 hierarchical clustering methods. In this article, a
                 hierarchical ensemble clustering framework that can
                 naturally combine both partitional clustering and
                 hierarchical clustering results is proposed. In
                 addition, a novel method for learning the ultra-metric
                 distance from the aggregated distance matrices and
                 generating final hierarchical clustering with enhanced
                 cluster separation is developed based on the
                 ultra-metric distance for hierarchical clustering. We
                 study three important problems: dendrogram description,
                 dendrogram combination, and dendrogram selection. We
                 develop two approaches for dendrogram selection based
                 on tree distances, and we investigate various
                 dendrogram distances for representing dendrograms. We
                 provide a systematic empirical study of the ensemble
                 hierarchical clustering problem. Experimental results
                 demonstrate the effectiveness of our proposed
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huai:2014:TPC,
  author =       "Baoxing Huai and Enhong Chen and Hengshu Zhu and Hui
                 Xiong and Tengfei Bao and Qi Liu and Jilei Tian",
  title =        "Toward Personalized Context Recognition for Mobile
                 Users: a Semisupervised {Bayesian} {HMM} Approach",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "10:1--10:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629504",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The problem of mobile context recognition targets the
                 identification of semantic meaning of context in a
                 mobile environment. This plays an important role in
                 understanding mobile user behaviors and thus provides
                 the opportunity for the development of better
                 intelligent context-aware services. A key step of
                 context recognition is to model the personalized
                 contextual information of mobile users. Although many
                 studies have been devoted to mobile context modeling,
                 limited efforts have been made on the exploitation of
                 the sequential and dependency characteristics of mobile
                 contextual information. Also, the latent semantics
                 behind mobile context are often ambiguous and poorly
                 understood. Indeed, a promising direction is to
                 incorporate some domain knowledge of common contexts,
                 such as ``waiting for a bus'' or ``having dinner,'' by
                 modeling both labeled and unlabeled context data from
                 mobile users because there are often few labeled
                 contexts available in practice. To this end, in this
                 article, we propose a sequence-based semisupervised
                 approach to modeling personalized context for mobile
                 users. Specifically, we first exploit the Bayesian
                 Hidden Markov Model (B-HMM) for modeling context in the
                 form of probabilistic distributions and transitions of
                 raw context data. Also, we propose a sequential model
                 by extending B-HMM with the prior knowledge of
                 contextual features to model context more accurately.
                 Then, to efficiently learn the parameters and initial
                 values of the proposed models, we develop a novel
                 approach for parameter estimation by integrating the
                 Dirichlet Process Mixture (DPM) model and the Mixture
                 Unigram (MU) model. Furthermore, by incorporating both
                 user-labeled and unlabeled data, we propose a
                 semisupervised learning-based algorithm to identify and
                 model the latent semantics of context. Finally,
                 experimental results on real-world data clearly
                 validate both the efficiency and effectiveness of the
                 proposed approaches for recognizing personalized
                 context of mobile users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2014:ADI,
  author =       "Siyuan Liu and Lei Chen and Lionel M. Ni",
  title =        "Anomaly Detection from Incomplete Data",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "11:1--11:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629668",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Anomaly detection (a.k.a., outlier or burst detection)
                 is a well-motivated problem and a major data mining and
                 knowledge discovery task. In this article, we study the
                 problem of population anomaly detection, one of the key
                 issues related to event monitoring and population
                 management within a city. Through studying detected
                 population anomalies, we can trace and analyze these
                 anomalies, which could help to model city traffic
                 design and event impact analysis and prediction.
                 Although a significant and interesting issue, it is
                 very hard to detect population anomalies and retrieve
                 anomaly trajectories, especially given that it is
                 difficult to get actual and sufficient population data.
                 To address the difficulties of a lack of real
                 population data, we take advantage of mobile phone
                 networks, which offer enormous spatial and temporal
                 communication data on persons. More importantly, we
                 claim that we can utilize these mobile phone data to
                 infer and approximate population data. Thus, we can
                 study the population anomaly detection problem by
                 taking advantages of unique features hidden in mobile
                 phone data. In this article, we present a system to
                 conduct Population Anomaly Detection (PAD). First, we
                 propose an effective clustering method,
                 correlation-based clustering, to cluster the incomplete
                 location information from mobile phone data (i.e., from
                 mobile call volume distribution to population density
                 distribution). Then, we design an adaptive
                 parameter-free detection method, R-scan, to capture the
                 distributed dynamic anomalies. Finally, we devise an
                 efficient algorithm, BT-miner, to retrieve anomaly
                 trajectories. The experimental results from real-life
                 mobile phone data confirm the effectiveness and
                 efficiency of the proposed algorithms. Finally, the
                 proposed methods are realized as a pilot system in a
                 city in China.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gundecha:2014:UVR,
  author =       "Pritam Gundecha and Geoffrey Barbier and Jiliang Tang
                 and Huan Liu",
  title =        "User Vulnerability and Its Reduction on a Social
                 Networking Site",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "12:1--12:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2630421",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Privacy and security are major concerns for many users
                 of social media. When users share information (e.g.,
                 data and photos) with friends, they can make their
                 friends vulnerable to security and privacy breaches
                 with dire consequences. With the continuous expansion
                 of a user's social network, privacy settings alone are
                 often inadequate to protect a user's profile. In this
                 research, we aim to address some critical issues
                 related to privacy protection: (1) How can we measure
                 and assess individual users' vulnerability? (2) With
                 the diversity of one's social network friends, how can
                 one figure out an effective approach to maintaining
                 balance between vulnerability and social utility? In
                 this work, first we present a novel way to define
                 vulnerable friends from an individual user's
                 perspective. User vulnerability is dependent on whether
                 or not the user's friends' privacy settings protect the
                 friend and the individual's network of friends (which
                 includes the user). We show that it is feasible to
                 measure and assess user vulnerability and reduce one's
                 vulnerability without changing the structure of a
                 social networking site. The approach is to unfriend
                 one's most vulnerable friends. However, when such a
                 vulnerable friend is also socially important,
                 unfriending him or her would significantly reduce one's
                 own social status. We formulate this novel problem as
                 vulnerability minimization with social utility
                 constraints. We formally define the optimization
                 problem and provide an approximation algorithm with a
                 proven bound. Finally, we conduct a large-scale
                 evaluation of a new framework using a Facebook dataset.
                 We resort to experiments and observe how much
                 vulnerability an individual user can be decreased by
                 unfriending a vulnerable friend. We compare performance
                 of different unfriending strategies and discuss the
                 security risk of new friend requests. Additionally, by
                 employing different forms of social utility, we confirm
                 that the balance between user vulnerability and social
                 utility can be practically achieved.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Duan:2014:SRC,
  author =       "Lian Duan and W. Nick Street and Yanchi Liu and
                 Songhua Xu and Brook Wu",
  title =        "Selecting the Right Correlation Measure for Binary
                 Data",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "13:1--13:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2637484",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Finding the most interesting correlations among items
                 is essential for problems in many commercial, medical,
                 and scientific domains. Although there are numerous
                 measures available for evaluating correlations,
                 different correlation measures provide drastically
                 different results. Piatetsky-Shapiro provided three
                 mandatory properties for any reasonable correlation
                 measure, and Tan et al. proposed several properties to
                 categorize correlation measures; however, it is still
                 hard for users to choose the desirable correlation
                 measures according to their needs. In order to solve
                 this problem, we explore the effectiveness problem in
                 three ways. First, we propose two desirable properties
                 and two optional properties for correlation measure
                 selection and study the property satisfaction for
                 different correlation measures. Second, we study
                 different techniques to adjust correlation measures and
                 propose two new correlation measures: the Simplified $
                 \chi^2 $ with Continuity Correction and the Simplified
                 $ \chi^2 $ with Support. Third, we analyze the upper
                 and lower bounds of different measures and categorize
                 them by the bound differences. Combining these three
                 directions, we provide guidelines for users to choose
                 the proper measure according to their needs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2014:PBA,
  author =       "Hao Huang and Hong Qin and Shinjae Yoo and Dantong
                 Yu",
  title =        "Physics-Based Anomaly Detection Defined on Manifold
                 Space",
  journal =      j-TKDD,
  volume =       "9",
  number =       "2",
  pages =        "14:1--14:??",
  month =        sep,
  year =         "2014",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2641574",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Oct 7 18:49:26 MDT 2014",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Current popular anomaly detection algorithms are
                 capable of detecting global anomalies but often fail to
                 distinguish local anomalies from normal instances.
                 Inspired by contemporary physics theory (i.e., heat
                 diffusion and quantum mechanics), we propose two
                 unsupervised anomaly detection algorithms. Building on
                 the embedding manifold derived from heat diffusion, we
                 devise Local Anomaly Descriptor (LAD), which faithfully
                 reveals the intrinsic neighborhood density. It uses a
                 scale-dependent umbrella operator to bridge global and
                 local properties, which makes LAD more informative
                 within an adaptive scope of neighborhood. To offer more
                 stability of local density measurement on scaling
                 parameter tuning, we formulate Fermi Density Descriptor
                 (FDD), which measures the probability of a fermion
                 particle being at a specific location. By choosing the
                 stable energy distribution function, FDD steadily
                 distinguishes anomalies from normal instances with any
                 scaling parameter setting. To further enhance the
                 efficacy of our proposed algorithms, we explore the
                 utility of anisotropic Gaussian kernel (AGK), which
                 offers better manifold-aware affinity information. We
                 also quantify and examine the effect of different
                 Laplacian normalizations for anomaly detection.
                 Comprehensive experiments on both synthetic and
                 benchmark datasets verify that our proposed algorithms
                 outperform the existing anomaly detection algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gionis:2015:ISI,
  author =       "Aristides Gionis and Hang Li",
  title =        "Introduction to the Special Issue {ACM SIGKDD} 2013",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "15:1--15:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700993",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15e",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jha:2015:SES,
  author =       "Madhav Jha and C. Seshadhri and Ali Pinar",
  title =        "A Space-Efficient Streaming Algorithm for Estimating
                 Transitivity and Triangle Counts Using the Birthday
                 Paradox",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "15:1--15:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700395",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 6 09:34:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We design a space-efficient algorithm that
                 approximates the transitivity (global clustering
                 coefficient) and total triangle count with only a
                 single pass through a graph given as a stream of edges.
                 Our procedure is based on the classic probabilistic
                 result, the birthday paradox. When the transitivity is
                 constant and there are more edges than wedges (common
                 properties for social networks), we can prove that our
                 algorithm requires $ O(\sqrt n) $ space ($n$ is the
                 number of vertices) to provide accurate estimates. We
                 run a detailed set of experiments on a variety of real
                 graphs and demonstrate that the memory requirement of
                 the algorithm is a tiny fraction of the graph. For
                 example, even for a graph with 200 million edges, our
                 algorithm stores just 40,000 edges to give accurate
                 results. Being a single pass streaming algorithm, our
                 procedure also maintains a real-time estimate of the
                 transitivity/number of triangles of a graph by storing
                 a minuscule fraction of edges.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2015:FMT,
  author =       "Lu-An Tang and Xiao Yu and Quanquan Gu and Jiawei Han
                 and Guofei Jiang and Alice Leung and Thomas {La
                 Porta}",
  title =        "A Framework of Mining Trajectories from Untrustworthy
                 Data in Cyber-Physical System",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700394",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 6 09:34:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A cyber-physical system (CPS) integrates physical
                 (i.e., sensor) devices with cyber (i.e., informational)
                 components to form a context-sensitive system that
                 responds intelligently to dynamic changes in real-world
                 situations. The CPS has wide applications in scenarios
                 such as environment monitoring, battlefield
                 surveillance, and traffic control. One key research
                 problem of CPS is called mining lines in the sand. With
                 a large number of sensors (sand) deployed in a
                 designated area, the CPS is required to discover all
                 trajectories (lines) of passing intruders in real time.
                 There are two crucial challenges that need to be
                 addressed: (1) the collected sensor data are not
                 trustworthy, and (2) the intruders do not send out any
                 identification information. The system needs to
                 distinguish multiple intruders and track their
                 movements. This study proposes a method called LiSM
                 (Line-in-the-Sand Miner) to discover trajectories from
                 untrustworthy sensor data. LiSM constructs a watching
                 network from sensor data and computes the locations of
                 intruder appearances based on the link information of
                 the network. The system retrieves a cone model from the
                 historical trajectories to track multiple intruders.
                 Finally, the system validates the mining results and
                 updates sensors' reliability scores in a feedback
                 process. In addition, LoRM (Line-on-the-Road Miner) is
                 proposed for trajectory discovery on road networks-
                 mining lines on the roads. LoRM employs a
                 filtering-and-refinement framework to reduce the
                 distance computational overhead on road networks and
                 uses a shortest-path-measure to track intruders. The
                 proposed methods are evaluated with extensive
                 experiments on big datasets. The experimental results
                 show that the proposed methods achieve higher accuracy
                 and efficiency in trajectory mining tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2015:QDR,
  author =       "Zheng Wang and Jieping Ye",
  title =        "Querying Discriminative and Representative Samples for
                 Batch Mode Active Learning",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700408",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 6 09:34:37 MST 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Empirical risk minimization (ERM) provides a
                 principled guideline for many machine learning and data
                 mining algorithms. Under the ERM principle, one
                 minimizes an upper bound of the true risk, which is
                 approximated by the summation of empirical risk and the
                 complexity of the candidate classifier class. To
                 guarantee a satisfactory learning performance, ERM
                 requires that the training data are i.i.d. sampled from
                 the unknown source distribution. However, this may not
                 be the case in active learning, where one selects the
                 most informative samples to label, and these data may
                 not follow the source distribution. In this article, we
                 generalize the ERM principle to the active learning
                 setting. We derive a novel form of upper bound for the
                 true risk in the active learning setting; by minimizing
                 this upper bound, we develop a practical batch mode
                 active learning method. The proposed formulation
                 involves a nonconvex integer programming optimization
                 problem. We solve it efficiently by an alternating
                 optimization method. Our method is shown to query the
                 most informative samples while preserving the source
                 distribution as much as possible, thus identifying the
                 most uncertain and representative queries. We further
                 extend our method to multiclass active learning by
                 introducing novel pseudolabels in the multiclass case
                 and developing an efficient algorithm. Experiments on
                 benchmark datasets and real-world applications
                 demonstrate the superior performance of our proposed
                 method compared to state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gopal:2015:HBI,
  author =       "Siddharth Gopal and Yiming Yang",
  title =        "Hierarchical {Bayesian} Inference and Recursive
                 Regularization for Large-Scale Classification",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "18:1--18:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2629585",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article, we address open challenges in
                 large-scale classification, focusing on how to
                 effectively leverage the dependency structures
                 (hierarchical or graphical) among class labels, and how
                 to make the inference scalable in jointly optimizing
                 all model parameters. We propose two main approaches,
                 namely the hierarchical Bayesian inference framework
                 and the recursive regularization scheme. The key idea
                 in both approaches is to reinforce the similarity among
                 parameter across the nodes in a hierarchy or network
                 based on the proximity and connectivity of the nodes.
                 For scalability, we develop hierarchical variational
                 inference algorithms and fast dual coordinate descent
                 training procedures with parallelization. In our
                 experiments for classification problems with hundreds
                 of thousands of classes and millions of training
                 instances with terabytes of parameters, the proposed
                 methods show consistent and statistically significant
                 improvements over other competing approaches, and the
                 best results on multiple benchmark datasets for
                 large-scale classification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yin:2015:MLB,
  author =       "Hongzhi Yin and Bin Cui and Ling Chen and Zhiting Hu
                 and Chengqi Zhang",
  title =        "Modeling Location-Based User Rating Profiles for
                 Personalized Recommendation",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "19:1--19:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2663356",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article proposes LA-LDA, a location-aware
                 probabilistic generative model that exploits
                 location-based ratings to model user profiles and
                 produce recommendations. Most of the existing
                 recommendation models do not consider the spatial
                 information of users or items; however, LA-LDA supports
                 three classes of location-based ratings, namely spatial
                 user ratings for nonspatial items, nonspatial user
                 ratings for spatial items, and spatial user ratings for
                 spatial items. LA-LDA consists of two components,
                 ULA-LDA and ILA-LDA, which are designed to take into
                 account user and item location information,
                 respectively. The component ULA-LDA explicitly
                 incorporates and quantifies the influence from local
                 public preferences to produce recommendations by
                 considering user home locations, whereas the component
                 ILA-LDA recommends items that are closer in both taste
                 and travel distance to the querying users by capturing
                 item co-occurrence patterns, as well as item location
                 co-occurrence patterns. The two components of LA-LDA
                 can be applied either separately or collectively,
                 depending on the available types of location-based
                 ratings. To demonstrate the applicability and
                 flexibility of the LA-LDA model, we deploy it to both
                 top-$k$ recommendation and cold start recommendation
                 scenarios. Experimental evidence on large-scale
                 real-world data, including the data from Gowalla (a
                 location-based social network), DoubanEvent (an
                 event-based social network), and MovieLens (a movie
                 recommendation system), reveal that LA-LDA models user
                 profiles more accurately by outperforming existing
                 recommendation models for top-$k$ recommendation and
                 the cold start problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hu:2015:PSD,
  author =       "Juhua Hu and De-Chuan Zhan and Xintao Wu and Yuan
                 Jiang and Zhi-Hua Zhou",
  title =        "Pairwised Specific Distance Learning from Physical
                 Linkages",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "20:1--20:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700405",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In real tasks, usually a good classification
                 performance can only be obtained when a good distance
                 metric is obtained; therefore, distance metric learning
                 has attracted significant attention in the past few
                 years. Typical studies of distance metric learning
                 evaluate how to construct an appropriate distance
                 metric that is able to separate training data points
                 from different classes or satisfy a set of constraints
                 (e.g., must-links and/or cannot-links). It is
                 noteworthy that this task becomes challenging when
                 there are only limited labeled training data points and
                 no constraints are given explicitly. Moreover, most
                 existing approaches aim to construct a global distance
                 metric that is applicable to all data points. However,
                 different data points may have different properties and
                 may require different distance metrics. We notice that
                 data points in real tasks are often connected by
                 physical links (e.g., people are linked with each other
                 in social networks; personal webpages are often
                 connected to other webpages, including nonpersonal
                 webpages), but the linkage information has not been
                 exploited in distance metric learning. In this article,
                 we develop a pairwised specific distance (PSD) approach
                 that exploits the structures of physical linkages and
                 in particular captures the key observations that
                 nonmetric and clique linkages imply the appearance of
                 different or unique semantics, respectively. It is
                 noteworthy that, rather than generating a global
                 distance, PSD generates different distances for
                 different pairs of data points; this property is
                 desired in applications involving complicated data
                 semantics. We mainly present PSD for multi-class
                 learning and further extend it to multi-label learning.
                 Experimental results validate the effectiveness of PSD,
                 especially in the scenarios in which there are very
                 limited labeled training data points and no explicit
                 constraints are given.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Soundarajan:2015:ULG,
  author =       "Sucheta Soundarajan and John E. Hopcroft",
  title =        "Use of Local Group Information to Identify Communities
                 in Networks",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "21:1--21:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700404",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The recent interest in networks has inspired a broad
                 range of work on algorithms and techniques to
                 characterize, identify, and extract communities from
                 networks. Such efforts are complicated by a lack of
                 consensus on what a ``community'' truly is, and these
                 disagreements have led to a wide variety of
                 mathematical formulations for describing communities.
                 Often, these mathematical formulations, such as
                 modularity and conductance, have been founded in the
                 general principle that communities, like a $ G(n, p) $
                 graph, are ``round,'' with connections throughout the
                 entire community, and so algorithms were developed to
                 optimize such mathematical measures. More recently, a
                 variety of algorithms have been developed that, rather
                 than expecting connectivity through the entire
                 community, seek out very small groups of well-connected
                 nodes and then connect these groups into larger
                 communities. In this article, we examine seven real
                 networks, each containing external annotation that
                 allows us to identify ``annotated communities.'' A
                 study of these annotated communities gives insight into
                 why the second category of community detection
                 algorithms may be more successful than the first
                 category. We then present a flexible algorithm template
                 that is based on the idea of joining together small
                 sets of nodes. In this template, we first identify very
                 small, tightly connected ``subcommunities'' of nodes,
                 each corresponding to a single node's ``perception'' of
                 the network around it. We then create a new network in
                 which each node represents such a subcommunity, and
                 then identify communities in this new network. Because
                 each node can appear in multiple subcommunities, this
                 method allows us to detect overlapping communities.
                 When evaluated on real data, we show that our template
                 outperforms many other state-of-the-art algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2015:UCN,
  author =       "Pinghui Wang and Junzhou Zhao and John C. S. Lui and
                 Don Towsley and Xiaohong Guan",
  title =        "Unbiased Characterization of Node Pairs over Large
                 Graphs",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "22:1--22:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700393",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Characterizing user pair relationships is important
                 for applications such as friend recommendation and
                 interest targeting in online social networks (OSNs).
                 Due to the large-scale nature of such networks, it is
                 infeasible to enumerate all user pairs and thus
                 sampling is used. In this article, we show that it is a
                 great challenge for OSN service providers to
                 characterize user pair relationships, even when they
                 possess the complete graph topology. The reason is that
                 when sampling techniques (i.e., uniform vertex sampling
                 (UVS) and random walk (RW)) are naively applied, they
                 can introduce large biases, particularly for estimating
                 similarity distribution of user pairs with constraints
                 like existence of mutual neighbors, which is important
                 for applications such as identifying network homophily.
                 Estimating statistics of user pairs is more challenging
                 in the absence of the complete topology information, as
                 an unbiased sampling technique like UVS is usually not
                 allowed and exploring the OSN graph topology is
                 expensive. To address these challenges, we present
                 unbiased sampling methods to characterize user pair
                 properties based on UVS and RW techniques. We carry out
                 an evaluation of our methods to show their accuracy and
                 efficiency. Finally, we apply our methods to three
                 OSNs-Foursquare, Douban, and Xiami-and discover that
                 significant homophily is present in these networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Vlachos:2015:DPC,
  author =       "Michail Vlachos and Johannes Schneider and Vassilios
                 G. Vassiliadis",
  title =        "On Data Publishing with Clustering Preservation",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "23:1--23:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700403",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The emergence of cloud-based storage services is
                 opening up new avenues in data exchange and data
                 dissemination. This has amplified the interest in
                 right-protection mechanisms to establish ownership in
                 the event of data leakage. Current right-protection
                 technologies, however, rarely provide strong guarantees
                 on dataset utility after the protection process. This
                 work presents techniques that explicitly address this
                 topic and provably preserve the outcome of certain
                 mining operations. In particular, we take special care
                 to guarantee that the outcome of hierarchical
                 clustering operations remains the same before and after
                 right protection. Our approach considers all prevalent
                 hierarchical clustering variants: single-, complete-,
                 and average-linkage. We imprint the ownership in a
                 dataset using watermarking principles, and we derive
                 tight bounds on the expansion/contraction of distances
                 incurred by the process. We leverage our analysis to
                 design fast algorithms for right protection without
                 exhaustively searching the vast design space. Finally,
                 because the right-protection process introduces a
                 user-tunable distortion on the dataset, we explore the
                 possibility of using this mechanism for data
                 obfuscation. We quantify the tradeoff between
                 obfuscation and utility for spatiotemporal datasets and
                 discover very favorable characteristics of the process.
                 An additional advantage is that when one is interested
                 in both right-protecting and obfuscating the original
                 data values, the proposed mechanism can accomplish both
                 tasks simultaneously.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{VazDeMelo:2015:UDP,
  author =       "Pedro O. S. {Vaz De Melo} and Christos Faloutsos and
                 Renato Assun{\c{c}}{\~a}o and Rodrigo Alves and Antonio
                 A. F. Loureiro",
  title =        "Universal and Distinct Properties of Communication
                 Dynamics: How to Generate Realistic Inter-event Times",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "24:1--24:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700399",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the advancement of information systems, means of
                 communications are becoming cheaper, faster, and more
                 available. Today, millions of people carrying
                 smartphones or tablets are able to communicate
                 practically any time and anywhere they want. They can
                 access their e-mails, comment on weblogs, watch and
                 post videos and photos (as well as comment on them),
                 and make phone calls or text messages almost
                 ubiquitously. Given this scenario, in this article, we
                 tackle a fundamental aspect of this new era of
                 communication: How the time intervals between
                 communication events behave for different technologies
                 and means of communications. Are there universal
                 patterns for the Inter-Event Time Distribution (IED)?
                 How do inter-event times behave differently among
                 particular technologies? To answer these questions, we
                 analyzed eight different datasets from real and modern
                 communication data and found four well-defined patterns
                 seen in all the eight datasets. Moreover, we propose
                 the use of the Self-Feeding Process (SFP) to generate
                 inter-event times between communications. The SFP is an
                 extremely parsimonious point process that requires at
                 most two parameters and is able to generate inter-event
                 times with all the universal properties we observed in
                 the data. We also show three potential applications of
                 the SFP: as a framework to generate a synthetic dataset
                 containing realistic communication events of any one of
                 the analyzed means of communications, as a technique to
                 detect anomalies, and as a building block for more
                 specific models that aim to encompass the
                 particularities seen in each of the analyzed systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2015:WIY,
  author =       "Jing Zhang and Jie Tang and Juanzi Li and Yang Liu and
                 Chunxiao Xing",
  title =        "Who Influenced You? {Predicting} Retweet via Social
                 Influence Locality",
  journal =      j-TKDD,
  volume =       "9",
  number =       "3",
  pages =        "25:1--25:??",
  month =        apr,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700398",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Apr 14 09:22:28 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Social influence occurs when one's opinions, emotions,
                 or behaviors are affected by others in a social
                 network. However, social influence takes many forms,
                 and its underlying mechanism is still unclear. For
                 example, how is one's behavior influenced by a group of
                 friends who know each other and by the friends from
                 different ego friend circles? In this article, we study
                 the social influence problem in a large microblogging
                 network. Particularly, we consider users' (re)tweet
                 behaviors and focus on investigating how friends in
                 one's ego network influence retweet behaviors. We
                 propose a novel notion of social influence locality and
                 develop two instantiation functions based on pairwise
                 influence and structural diversity. The defined
                 influence locality functions have strong predictive
                 power. Without any additional features, we can obtain
                 an F1-score of 71.65\% for predicting users' retweet
                 behaviors by training a logistic regression classifier
                 based on the defined influence locality functions. We
                 incorporate social influence locality into a factor
                 graph model, which can further leverage the
                 network-based correlation. Our experiments on the large
                 microblogging network show that the model significantly
                 improves the precision of retweet prediction. Our
                 analysis also reveals several intriguing discoveries.
                 For example, if you have six friends retweeting a
                 microblog, the average likelihood that you will also
                 retweet it strongly depends on the structure among the
                 six friends: The likelihood will significantly drop
                 (only 1/6) when the six friends do not know each other,
                 compared with the case when the six friends know each
                 other.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xie:2015:MMA,
  author =       "Hong Xie and John C. S. Lui",
  title =        "Mathematical Modeling and Analysis of Product Rating
                 with Partial Information",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "26:1--26:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700386",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Many Web services like Amazon, Epinions, and
                 TripAdvisor provide historical product ratings so that
                 users can evaluate the quality of products. Product
                 ratings are important because they affect how well a
                 product will be adopted by the market. The challenge is
                 that we only have partial information on these ratings:
                 each user assigns ratings to only a small subset of
                 products. Under this partial information setting, we
                 explore a number of fundamental questions. What is the
                 minimum number of ratings a product needs so that one
                 can make a reliable evaluation of its quality? How may
                 users' misbehavior, such as cheating in product rating,
                 affect the evaluation result? To answer these
                 questions, we present a probabilistic model to capture
                 various important factors (e.g., rating aggregation
                 rules, rating behavior) that may influence the product
                 quality assessment under the partial information
                 setting. We derive the minimum number of ratings needed
                 to produce a reliable indicator on the quality of a
                 product. We extend our model to accommodate users'
                 misbehavior in product rating. We derive the maximum
                 fraction of misbehaving users that a rating aggregation
                 rule can tolerate and the minimum number of ratings
                 needed to compensate. We carry out experiments using
                 both synthetic and real-world data (from Amazon and
                 TripAdvisor). We not only validate our model but also
                 show that the ``average rating rule'' produces more
                 reliable and robust product quality assessments than
                 the ``majority rating rule'' and the ``median rating
                 rule'' in aggregating product ratings. Last, we perform
                 experiments on two movie rating datasets (from Flixster
                 and Netflix) to demonstrate how to apply our framework
                 to improve the applications of recommender systems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Esuli:2015:OTQ,
  author =       "Andrea Esuli and Fabrizio Sebastiani",
  title =        "Optimizing Text Quantifiers for Multivariate Loss
                 Functions",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "27:1--27:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700406",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We address the problem of quantification, a supervised
                 learning task whose goal is, given a class, to estimate
                 the relative frequency (or prevalence) of the class in
                 a dataset of unlabeled items. Quantification has
                 several applications in data and text mining, such as
                 estimating the prevalence of positive reviews in a set
                 of reviews of a given product or estimating the
                 prevalence of a given support issue in a dataset of
                 transcripts of phone calls to tech support. So far,
                 quantification has been addressed by learning a
                 general-purpose classifier, counting the unlabeled
                 items that have been assigned the class, and tuning the
                 obtained counts according to some heuristics. In this
                 article, we depart from the tradition of using
                 general-purpose classifiers and use instead a
                 supervised learning model for structured prediction,
                 capable of generating classifiers directly optimized
                 for the (multivariate and nonlinear) function used for
                 evaluating quantification accuracy. The experiments
                 that we have run on 5,500 binary high-dimensional
                 datasets (averaging more than 14,000 documents each)
                 show that this method is more accurate, more stable,
                 and more efficient than existing state-of-the-art
                 quantification methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2015:IMS,
  author =       "Bing-Rong Lin and Daniel Kifer",
  title =        "Information Measures in Statistical Privacy and Data
                 Processing Applications",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "28:1--28:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700407",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In statistical privacy, utility refers to two
                 concepts: information preservation, how much
                 statistical information is retained by a sanitizing
                 algorithm, and usability, how (and with how much
                 difficulty) one extracts this information to build
                 statistical models, answer queries, and so forth. Some
                 scenarios incentivize a separation between information
                 preservation and usability, so that the data owner
                 first chooses a sanitizing algorithm to maximize a
                 measure of information preservation, and, afterward,
                 the data consumers process the sanitized output
                 according to their various individual needs [Ghosh et
                 al. 2009; Williams and McSherry 2010]. We analyze the
                 information-preserving properties of utility measures
                 with a combination of two new and three existing
                 utility axioms and study how violations of an axiom can
                 be fixed. We show that the average (over possible
                 outputs of the sanitizer) error of Bayesian decision
                 makers forms the unique class of utility measures that
                 satisfy all of the axioms. The axioms are agnostic to
                 Bayesian concepts such as subjective probabilities and
                 hence strengthen support for Bayesian views in privacy
                 research. In particular, this result connects
                 information preservation to aspects of usability-if the
                 information preservation of a sanitizing algorithm
                 should be measured as the average error of a Bayesian
                 decision maker, shouldn't Bayesian decision theory be a
                 good choice when it comes to using the sanitized
                 outputs for various purposes? We put this idea to the
                 test in the unattributed histogram problem where our
                 decision-theoretic postprocessing algorithm empirically
                 outperforms previously proposed approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2015:DAC,
  author =       "Hao Huang and Shinjae Yoo and Dantong Yu and Hong
                 Qin",
  title =        "Density-Aware Clustering Based on Aggregated Heat
                 Kernel and Its Transformation",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "29:1--29:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700385",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Current spectral clustering algorithms suffer from the
                 sensitivity to existing noise and parameter scaling and
                 may not be aware of different density distributions
                 across clusters. If these problems are left untreated,
                 the consequent clustering results cannot accurately
                 represent true data patterns, in particular, for
                 complex real-world datasets with heterogeneous
                 densities. This article aims to solve these problems by
                 proposing a diffusion-based Aggregated Heat Kernel
                 (AHK) to improve the clustering stability, and a Local
                 Density Affinity Transformation (LDAT) to correct the
                 bias originating from different cluster densities. AHK
                 statistically models the heat diffusion traces along
                 the entire time scale, so it ensures robustness during
                 the clustering process, while LDAT probabilistically
                 reveals the local density of each instance and
                 suppresses the local density bias in the affinity
                 matrix. Our proposed framework integrates these two
                 techniques systematically. As a result, it not only
                 provides an advanced noise-resisting and density-aware
                 spectral mapping to the original dataset but also
                 demonstrates the stability during the processing of
                 tuning the scaling parameter (which usually controls
                 the range of neighborhood). Furthermore, our framework
                 works well with the majority of similarity kernels,
                 which ensures its applicability to many types of data
                 and problem domains. The systematic experiments on
                 different applications show that our proposed algorithm
                 outperforms state-of-the-art clustering algorithms for
                 the data with heterogeneous density distributions and
                 achieves robust clustering performance with respect to
                 tuning the scaling parameter and handling various
                 levels and types of noise.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2015:CSF,
  author =       "Kui Yu and Wei Ding and Dan A. Simovici and Hao Wang
                 and Jian Pei and Xindong Wu",
  title =        "Classification with Streaming Features: an
                 Emerging-Pattern Mining Approach",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "30:1--30:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2700409",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Many datasets from real-world applications have very
                 high-dimensional or increasing feature space. It is a
                 new research problem to learn and maintain a classifier
                 to deal with very high dimensionality or streaming
                 features. In this article, we adapt the well-known
                 emerging-pattern--based classification models and
                 propose a semi-streaming approach. For streaming
                 features, it is computationally expensive or even
                 prohibitive to mine long-emerging patterns, and it is
                 nontrivial to integrate emerging-pattern mining with
                 feature selection. We present an online feature
                 selection step, which is capable of selecting and
                 maintaining a pool of effective features from a feature
                 stream. Then, in our offline step, separated from the
                 online step, we periodically compute and update
                 emerging patterns from the pool of selected features
                 from the online step. We evaluate the effectiveness and
                 efficiency of the proposed method using a series of
                 benchmark datasets and a real-world case study on Mars
                 crater detection. Our proposed method yields
                 classification performance comparable to the
                 state-of-art static classification methods. Most
                 important, the proposed method is significantly faster
                 and can efficiently handle datasets with streaming
                 features.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2015:SEH,
  author =       "Guimei Liu and Haojun Zhang and Mengling Feng and
                 Limsoon Wong and See-Kiong Ng",
  title =        "Supporting Exploratory Hypothesis Testing and
                 Analysis",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "31:1--31:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2701430",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Conventional hypothesis testing is carried out in a
                 hypothesis-driven manner. A scientist must first
                 formulate a hypothesis based on what he or she sees and
                 then devise a variety of experiments to test it. Given
                 the rapid growth of data, it has become virtually
                 impossible for a person to manually inspect all data to
                 find all of the interesting hypotheses for testing. In
                 this article, we propose and develop a data-driven
                 framework for automatic hypothesis testing and
                 analysis. We define a hypothesis as a comparison
                 between two or more subpopulations. We find
                 subpopulations for comparison using frequent pattern
                 mining techniques and then pair them up for statistical
                 hypothesis testing. We also generate additional
                 information for further analysis of the hypotheses that
                 are deemed significant. The number of hypotheses
                 generated can be very large, and many of them are very
                 similar. We develop algorithms to remove redundant
                 hypotheses and present a succinct set of significant
                 hypotheses to users. We conducted a set of experiments
                 to show the efficiency and effectiveness of the
                 proposed algorithms. The results show that our system
                 can help users (1) identify significant hypotheses
                 efficiently, (2) isolate the reasons behind significant
                 hypotheses efficiently, and (3) find confounding
                 factors that form Simpson's paradoxes with discovered
                 significant hypotheses.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Greco:2015:PDU,
  author =       "Gianluigi Greco and Antonella Guzzo and Francesco
                 Lupia and Luigi Pontieri",
  title =        "Process Discovery under Precedence Constraints",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "32:1--32:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710020",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Process discovery has emerged as a powerful approach
                 to support the analysis and the design of complex
                 processes. It consists of analyzing a set of traces
                 registering the sequence of tasks performed along
                 several enactments of a transactional system, in order
                 to build a process model that can explain all the
                 episodes recorded over them. An approach to accomplish
                 this task is presented that can benefit from the
                 background knowledge that, in many cases, is available
                 to the analysts taking care of the process (re-)design.
                 The approach is based on encoding the information
                 gathered from the log and the (possibly) given
                 background knowledge in terms of precedence
                 constraints, that is, of constraints over the topology
                 of the resulting process models. Mining algorithms are
                 eventually formulated in terms of reasoning problems
                 over precedence constraints, and the computational
                 complexity of such problems is thoroughly analyzed by
                 tracing their tractability frontier. Solution
                 algorithms are proposed and their properties analyzed.
                 These algorithms have been implemented in a prototype
                 system, and results of a thorough experimental activity
                 are discussed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mirbakhsh:2015:ITR,
  author =       "Nima Mirbakhsh and Charles X. Ling",
  title =        "Improving Top-{$N$} Recommendation for Cold-Start
                 Users via Cross-Domain Information",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "33:1--33:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2724720",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Making accurate recommendations for cold-start users
                 is a challenging yet important problem in
                 recommendation systems. Including more information from
                 other domains is a natural solution to improve the
                 recommendations. However, most previous work in
                 cross-domain recommendations has focused on improving
                 prediction accuracy with several severe limitations. In
                 this article, we extend our previous work on
                 clustering-based matrix factorization in single domains
                 into cross domains. In addition, we utilize recent
                 results on unobserved ratings. Our new method can more
                 effectively utilize data from auxiliary domains to
                 achieve better recommendations, especially for
                 cold-start users. For example, our method improves the
                 recall to 21\% on average for cold-start users, whereas
                 previous methods result in only 15\% recall in the
                 cross-domain Amazon dataset. We also observe almost the
                 same improvements in the Epinions dataset. Considering
                 that it is often difficult to make even a small
                 improvement in recommendations, for cold-start users in
                 particular, our result is quite significant.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bonchi:2015:CCC,
  author =       "Francesco Bonchi and Aristides Gionis and Francesco
                 Gullo and Charalampos E. Tsourakakis and Antti
                 Ukkonen",
  title =        "Chromatic Correlation Clustering",
  journal =      j-TKDD,
  volume =       "9",
  number =       "4",
  pages =        "34:1--34:??",
  month =        jun,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2728170",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jun 3 06:21:22 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We study a novel clustering problem in which the
                 pairwise relations between objects are categorical.
                 This problem can be viewed as clustering the vertices
                 of a graph whose edges are of different types (colors).
                 We introduce an objective function that ensures the
                 edges within each cluster have, as much as possible,
                 the same color. We show that the problem is NP-hard and
                 propose a randomized algorithm with approximation
                 guarantee proportional to the maximum degree of the
                 input graph. The algorithm iteratively picks a random
                 edge as a pivot, builds a cluster around it, and
                 removes the cluster from the graph. Although being
                 fast, easy to implement, and parameter-free, this
                 algorithm tends to produce a relatively large number of
                 clusters. To overcome this issue we introduce a variant
                 algorithm, which modifies how the pivot is chosen and
                 how the cluster is built around the pivot. Finally, to
                 address the case where a fixed number of output
                 clusters is required, we devise a third algorithm that
                 directly optimizes the objective function based on the
                 alternating-minimization paradigm. We also extend our
                 objective function to handle cases where object's
                 relations are described by multiple labels. We modify
                 our randomized approximation algorithm to optimize such
                 an extended objective function and show that its
                 approximation guarantee remains proportional to the
                 maximum degree of the graph. We test our algorithms on
                 synthetic and real data from the domains of social
                 media, protein-interaction networks, and bibliometrics.
                 Results reveal that our algorithms outperform a
                 baseline algorithm both in the task of reconstructing a
                 ground-truth clustering and in terms of
                 objective-function value.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2015:LSC,
  author =       "Hua Wang and Feiping Nie and Heng Huang",
  title =        "Large-Scale Cross-Language {Web} Page Classification
                 via Dual Knowledge Transfer Using Fast Nonnegative
                 Matrix Trifactorization",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2710021",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the rapid growth of modern technologies, Internet
                 has reached almost every corner of the world. As a
                 result, it becomes more and more important to manage
                 and mine information contained in Web pages in
                 different languages. Traditional supervised learning
                 methods usually require a large amount of training data
                 to obtain accurate and robust classification models.
                 However, labeled Web pages did not increase as fast as
                 the growth of Internet. The lack of sufficient training
                 Web pages in many languages, especially for those in
                 uncommonly used languages, makes it a challenge for
                 traditional classification algorithms to achieve
                 satisfactory performance. To address this, we observe
                 that Web pages for a same topic from different
                 languages usually share some common semantic patterns,
                 though in different representation forms. In addition,
                 we also observe that the associations between word
                 clusters and Web page classes are another type of
                 reliable carriers to transfer knowledge across
                 languages. With these recognitions, in this article we
                 propose a novel joint nonnegative matrix
                 trifactorization (NMTF) based Dual Knowledge Transfer
                 (DKT) approach for cross-language Web page
                 classification. Our approach transfers knowledge from
                 the auxiliary language, in which abundant labeled Web
                 pages are available, to the target languages, in which
                 we want to classify Web pages, through two different
                 paths: word cluster approximation and the associations
                 between word clusters and Web page classes. With the
                 reinforcement between these two different knowledge
                 transfer paths, our approach can achieve better
                 classification accuracy. In order to deal with the
                 large-scale real world data, we further develop the
                 proposed DKT approach by constraining the factor
                 matrices of NMTF to be cluster indicator matrices. Due
                 to the nature of cluster indicator matrices, we can
                 decouple the proposed optimization objective and the
                 resulted subproblems are of much smaller sizes
                 involving much less matrix multiplications, which make
                 our new approach much more computationally efficient.
                 We evaluate the proposed approach in extensive
                 experiments using a real world cross-language Web page
                 data set. Promising results have demonstrated the
                 effectiveness of our approach that are consistent with
                 our theoretical analyses.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2015:SIB,
  author =       "Yang Zhou and Ling Liu",
  title =        "Social Influence Based Clustering and Optimization
                 over Heterogeneous Information Networks",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2717314",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Social influence analysis has shown great potential
                 for strategic marketing decision. It is well known that
                 people influence one another based on both their social
                 connections and the social activities that they have
                 engaged in the past. In this article, we develop an
                 innovative and high-performance social influence based
                 graph clustering framework with four unique features.
                 First, we explicitly distinguish social connection
                 based influence (self-influence) and social activity
                 based influence (co-influence). We compute the
                 self-influence similarity between two members based on
                 their social connections within a single collaboration
                 network, and compute the co-influence similarity by
                 taking into account not only the set of activities that
                 people participate but also the semantic association
                 between these activities. Second, we define the concept
                 of influence-based similarity by introducing a unified
                 influence-based similarity matrix that employs an
                 iterative weight update method to integrate
                 self-influence and co-influence similarities. Third, we
                 design a dynamic learning algorithm, called SI-C
                 luster, for social influence based graph clustering. It
                 iteratively partitions a large social collaboration
                 network into K clusters based on both the social
                 network itself and the multiple associated activity
                 information networks, each representing a category of
                 activities that people have engaged. To make the
                 SI-Cluster algorithm converge fast, we transform
                 sophisticated nonlinear fractional programming problem
                 with respect to multiple weights into a straightforward
                 nonlinear parametric programming problem of single
                 variable. Finally, we develop an optimization technique
                 of diagonalizable-matrix approximation to speed up the
                 computation of self-influence similarity and
                 co-influence similarities. Our SI-Cluster-Opt
                 significantly improves the efficiency of SI-Cluster on
                 large graphs while maintaining high quality of
                 clustering results. Extensive experimental evaluation
                 on three real-world graphs shows that, compared to
                 existing representative graph clustering algorithms,
                 our SI-Cluster-Opt approach not only achieves a very
                 good balance between self-influence and co-influence
                 similarities but also scales extremely well for
                 clustering large graphs in terms of time complexity
                 while meeting the guarantee of high density, low
                 entropy and low Davies--Bouldin Index.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Papalexakis:2015:PSP,
  author =       "Evangelos E. Papalexakis and Christos Faloutsos and
                 Nicholas D. Sidiropoulos",
  title =        "{ParCube}: Sparse Parallelizable {CANDECOMP--PARAFAC}
                 Tensor Decomposition",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2729980",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How can we efficiently decompose a tensor into sparse
                 factors, when the data do not fit in memory? Tensor
                 decompositions have gained a steadily increasing
                 popularity in data-mining applications; however, the
                 current state-of-art decomposition algorithms operate
                 on main memory and do not scale to truly large
                 datasets. In this work, we propose ParCube, a new and
                 highly parallelizable method for speeding up tensor
                 decompositions that is well suited to produce sparse
                 approximations. Experiments with even moderately large
                 data indicate over 90\% sparser outputs and 14 times
                 faster execution, with approximation error close to the
                 current state of the art irrespective of computation
                 and memory requirements. We provide theoretical
                 guarantees for the algorithm's correctness and we
                 experimentally validate our claims through extensive
                 experiments, including four different real world
                 datasets (Enron, Lbnl, Facebook and Nell),
                 demonstrating its effectiveness for data-mining
                 practitioners. In particular, we are the first to
                 analyze the very large Nell dataset using a sparse
                 tensor decomposition, demonstrating that ParCube
                 enables us to handle effectively and efficiently very
                 large datasets. Finally, we make our highly scalable
                 parallel implementation publicly available, enabling
                 reproducibility of our work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ahmed:2015:AMC,
  author =       "Rezwan Ahmed and George Karypis",
  title =        "Algorithms for Mining the Coevolving Relational Motifs
                 in Dynamic Networks",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2733380",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Computational methods and tools that can efficiently
                 and effectively analyze the temporal changes in dynamic
                 complex relational networks enable us to gain
                 significant insights regarding the entity relations and
                 their evolution. This article introduces a new class of
                 dynamic graph patterns, referred to as coevolving
                 relational motifs (CRMs), which are designed to
                 identify recurring sets of entities whose relations
                 change in a consistent way over time. CRMs can provide
                 evidence to the existence of, possibly unknown,
                 coordination mechanisms by identifying the relational
                 motifs that evolve in a similar and highly conserved
                 fashion. We developed an algorithm to efficiently
                 analyze the frequent relational changes between the
                 entities of the dynamic networks and capture all
                 frequent coevolutions as CRMs. Our algorithm follows a
                 depth-first exploration of the frequent CRM lattice and
                 incorporates canonical labeling for redundancy
                 elimination. Experimental results based on multiple
                 real world dynamic networks show that the method is
                 able to efficiently identify CRMs. In addition, a
                 qualitative analysis of the results shows that the
                 discovered patterns can be used as features to
                 characterize the dynamic network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Campello:2015:HDE,
  author =       "Ricardo J. G. B. Campello and Davoud Moulavi and
                 Arthur Zimek and J{\"o}rg Sander",
  title =        "Hierarchical Density Estimates for Data Clustering,
                 Visualization, and Outlier Detection",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2733381",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "An integrated framework for density-based cluster
                 analysis, outlier detection, and data visualization is
                 introduced in this article. The main module consists of
                 an algorithm to compute hierarchical estimates of the
                 level sets of a density, following Hartigan's classic
                 model of density-contour clusters and trees. Such an
                 algorithm generalizes and improves existing
                 density-based clustering techniques with respect to
                 different aspects. It provides as a result a complete
                 clustering hierarchy composed of all possible
                 density-based clusters following the nonparametric
                 model adopted, for an infinite range of density
                 thresholds. The resulting hierarchy can be easily
                 processed so as to provide multiple ways for data
                 visualization and exploration. It can also be further
                 postprocessed so that: (i) a normalized score of
                 ``outlierness'' can be assigned to each data object,
                 which unifies both the global and local perspectives of
                 outliers into a single definition; and (ii) a ``flat''
                 (i.e., nonhierarchical) clustering solution composed of
                 clusters extracted from local cuts through the cluster
                 tree (possibly corresponding to different density
                 thresholds) can be obtained, either in an unsupervised
                 or in a semisupervised way. In the unsupervised
                 scenario, the algorithm corresponding to this
                 postprocessing module provides a global, optimal
                 solution to the formal problem of maximizing the
                 overall stability of the extracted clusters. If
                 partially labeled objects or instance-level constraints
                 are provided by the user, the algorithm can solve the
                 problem by considering both constraints
                 violations/satisfactions and cluster stability
                 criteria. An asymptotic complexity analysis, both in
                 terms of running time and memory space, is described.
                 Experiments are reported that involve a variety of
                 synthetic and real datasets, including comparisons with
                 state-of-the-art, density-based clustering and (global
                 and local) outlier detection methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Berardi:2015:UTR,
  author =       "Giacomo Berardi and Andrea Esuli and Fabrizio
                 Sebastiani",
  title =        "Utility-Theoretic Ranking for Semiautomated Text
                 Classification",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742548",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Semiautomated Text Classification (SATC) may be
                 defined as the task of ranking a set D of automatically
                 labelled textual documents in such a way that, if a
                 human annotator validates (i.e., inspects and corrects
                 where appropriate) the documents in a top-ranked
                 portion of D with the goal of increasing the overall
                 labelling accuracy of D, the expected increase is
                 maximized. An obvious SATC strategy is to rank D so
                 that the documents that the classifier has labelled
                 with the lowest confidence are top ranked. In this
                 work, we show that this strategy is suboptimal. We
                 develop new utility-theoretic ranking methods based on
                 the notion of validation gain, defined as the
                 improvement in classification effectiveness that would
                 derive by validating a given automatically labelled
                 document. We also propose a new effectiveness measure
                 for SATC-oriented ranking methods, based on the
                 expected reduction in classification error brought
                 about by partially validating a list generated by a
                 given ranking method. We report the results of
                 experiments showing that, with respect to the baseline
                 method mentioned earlier, and according to the proposed
                 measure, our utility-theoretic ranking methods can
                 achieve substantially higher expected reductions in
                 classification error.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2015:DIP,
  author =       "Zhiwen Yu and Zhu Wang and Huilei He and Jilei Tian
                 and Xinjiang Lu and Bin Guo",
  title =        "Discovering Information Propagation Patterns in
                 Microblogging Services",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742801",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "During the last decade, microblog has become an
                 important social networking service with billions of
                 users all over the world, acting as a novel and
                 efficient platform for the creation and dissemination
                 of real-time information. Modeling and revealing the
                 information propagation patterns in microblogging
                 services cannot only lead to more accurate
                 understanding of user behaviors and provide insights
                 into the underlying sociology, but also enable useful
                 applications such as trending prediction,
                 recommendation and filtering, spam detection and viral
                 marketing. In this article, we aim to reveal the
                 information propagation patterns in Sina Weibo, the
                 biggest microblogging service in China. First, the
                 cascade of each message is represented as a tree based
                 on its retweeting process. Afterwards, we divide the
                 information propagation pattern into two levels, that
                 is, the macro level and the micro level. On one hand,
                 the macro propagation patterns refer to general
                 propagation modes that are extracted by grouping
                 propagation trees based on hierarchical clustering. On
                 the other hand, the micro propagation patterns are
                 frequent information flow patterns that are discovered
                 using tree-based mining techniques. Experimental
                 results show that several interesting patterns are
                 extracted, such as popular message propagation,
                 artificial propagation, and typical information flows
                 between different types of users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2015:SMB,
  author =       "Xianchao Zhang and Xiaotong Zhang and Han Liu",
  title =        "Smart Multitask {Bregman} Clustering and Multitask
                 Kernel Clustering",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2747879",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Traditional clustering algorithms deal with a single
                 clustering task on a single dataset. However, there are
                 many related tasks in the real world, which motivates
                 multitask clustering. Recently some multitask
                 clustering algorithms have been proposed, and among
                 them multitask Bregman clustering (MBC) is a very
                 applicable method. MBC alternatively updates clusters
                 and learns relationships between clusters of different
                 tasks, and the two phases boost each other. However,
                 the boosting does not always have positive effects on
                 improving the clustering performance, it may also cause
                 negative effects. Another issue of MBC is that it
                 cannot deal with nonlinear separable data. In this
                 article, we show that in MBC, the process of using
                 cluster relationship to boost the cluster updating
                 phase may cause negative effects, that is, cluster
                 centroids may be skewed under some conditions. We
                 propose a smart multitask Bregman clustering (S-MBC)
                 algorithm which can identify the negative effects of
                 the boosting and avoid the negative effects if they
                 occur. We then propose a multitask kernel clustering
                 (MKC) framework for nonlinear separable data by using a
                 similar framework like MBC in the kernel space. We also
                 propose a specific optimization method, which is quite
                 different from that of MBC, to implement the MKC
                 framework. Since MKC can also cause negative effects
                 like MBC, we further extend the framework of MKC to a
                 smart multitask kernel clustering (S-MKC) framework in
                 a similar way that S-MBC is extended from MBC. We
                 conduct experiments on 10 real world multitask
                 clustering datasets to evaluate the performance of
                 S-MBC and S-MKC. The results on clustering accuracy
                 show that: (1) compared with the original MBC algorithm
                 MBC, S-MBC and S-MKC perform much better; (2) compared
                 with the convex discriminative multitask relationship
                 clustering (DMTRC) algorithms DMTRC-L and DMTRC-R which
                 also avoid negative transfer, S-MBC and S-MKC perform
                 worse in the (ideal) case in which different tasks have
                 the same cluster number and the empirical label
                 marginal distribution in each task distributes evenly,
                 but better or comparable in other (more general) cases.
                 Moreover, S-MBC and S-MKC can work on the datasets in
                 which different tasks have different number of
                 clusters, violating the assumptions of DMTRC-L and
                 DMTRC-R. The results on efficiency show that S-MBC and
                 S-MKC consume more computational time than MBC and less
                 computational time than DMTRC-L and DMTRC-R. Overall
                 S-MBC and S-MKC are competitive compared with the
                 state-of-the-art multitask clustering algorithms in
                 synthetical terms of accuracy, efficiency and
                 applicability.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wei:2015:MTP,
  author =       "Wei Wei and Kathleen M. Carley",
  title =        "Measuring Temporal Patterns in Dynamic Social
                 Networks",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2749465",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given social networks over time, how can we measure
                 network activities across different timesteps with a
                 limited number of metrics? We propose two classes of
                 dynamic metrics for assessing temporal evolution
                 patterns of agents in terms of persistency and
                 emergence. For each class of dynamic metrics, we
                 implement it using three different temporal aggregation
                 models ranging from the most commonly used Average
                 Aggregation Model to more the complex models such as
                 the Exponential Aggregation Model. We argue that the
                 problem of measuring temporal patterns can be
                 formulated using Recency and Primacy effect, which is a
                 concept used to characterize human cognitive processes.
                 Experimental results show that the way metrics model
                 Recency--Primacy effect is closely related to their
                 abilities to measure temporal patterns. Furthermore,
                 our results indicate that future network agent
                 activities can be predicted based on history
                 information using dynamic metrics. By conducting
                 multiple experiments, we are also able to find an
                 optimal length of history information that is most
                 relevant to future activities. This optimal length is
                 highly consistent within a dataset and can be used as
                 an intrinsic metric to evaluate a dynamic social
                 network.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2015:RAT,
  author =       "Siyuan Liu and Qiang Qu and Shuhui Wang",
  title =        "Rationality Analytics from Trajectories",
  journal =      j-TKDD,
  volume =       "10",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jul,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2735634",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jul 28 17:19:31 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The availability of trajectories tracking the
                 geographical locations of people as a function of time
                 offers an opportunity to study human behaviors. In this
                 article, we study rationality from the perspective of
                 user decision on visiting a point of interest (POI)
                 which is represented as a trajectory. However, the
                 analysis of rationality is challenged by a number of
                 issues, for example, how to model a trajectory in terms
                 of complex user decision processes? and how to detect
                 hidden factors that have significant impact on the
                 rational decision making? In this study, we propose
                 Rationality Analysis Model (RAM) to analyze rationality
                 from trajectories in terms of a set of impact factors.
                 In order to automatically identify hidden factors, we
                 propose a method, Collective Hidden Factor Retrieval
                 (CHFR), which can also be generalized to parse multiple
                 trajectories at the same time or parse individual
                 trajectories of different time periods. Extensive
                 experimental study is conducted on three large-scale
                 real-life datasets (i.e., taxi trajectories, user
                 shopping trajectories, and visiting trajectories in a
                 theme park). The results show that the proposed methods
                 are efficient, effective, and scalable. We also deploy
                 a system in a large theme park to conduct a field
                 study. Interesting findings and user feedback of the
                 field study are provided to support other applications
                 in user behavior mining and analysis, such as business
                 intelligence and user management for marketing
                 purposes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jia:2015:SGR,
  author =       "Adele Lu Jia and Siqi Shen and Ruud {Van De Bovenkamp}
                 and Alexandru Iosup and Fernando Kuipers and Dick H. J.
                 Epema",
  title =        "Socializing by Gaming: Revealing Social Relationships
                 in Multiplayer Online Games",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "11:1--11:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2736698",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multiplayer Online Games (MOGs) like Defense of the
                 Ancients and StarCraft II have attracted hundreds of
                 millions of users who communicate, interact, and
                 socialize with each other through gaming. In MOGs, rich
                 social relationships emerge and can be used to improve
                 gaming services such as match recommendation and game
                 population retention, which are important for the user
                 experience and the commercial value of the companies
                 who run these MOGs. In this work, we focus on
                 understanding social relationships in MOGs. We propose
                 a graph model that is able to capture social
                 relationships of a variety of types and strengths. We
                 apply our model to real-world data collected from three
                 MOGs that contain in total over ten years of behavioral
                 history for millions of players and matches. We compare
                 social relationships in MOGs across different game
                 genres and with regular online social networks like
                 Facebook. Taking match recommendation as an example
                 application of our model, we propose SAMRA, a Socially
                 Aware Match Recommendation Algorithm that takes social
                 relationships into account. We show that our model not
                 only improves the precision of traditional link
                 prediction approaches, but also potentially helps
                 players enjoy games to a higher extent.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Papagelis:2015:RSG,
  author =       "Manos Papagelis",
  title =        "Refining Social Graph Connectivity via Shortcut Edge
                 Addition",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "12:1--12:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2757281",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Small changes on the structure of a graph can have a
                 dramatic effect on its connectivity. While in the
                 traditional graph theory, the focus is on well-defined
                 properties of graph connectivity, such as
                 biconnectivity, in the context of a social graph,
                 connectivity is typically manifested by its ability to
                 carry on social processes. In this paper, we consider
                 the problem of adding a small set of nonexisting edges
                 (shortcuts) in a social graph with the main objective
                 of minimizing its characteristic path length. This
                 property determines the average distance between pairs
                 of vertices and essentially controls how broadly
                 information can propagate through a network. We
                 formally define the problem of interest, characterize
                 its hardness and propose a novel method, path
                 screening, which quickly identifies important shortcuts
                 to guide the augmentation of the graph. We devise a
                 sampling-based variant of our method that can scale up
                 the computation in larger graphs. The claims of our
                 methods are formally validated. Through experiments on
                 real and synthetic data, we demonstrate that our
                 methods are a multitude of times faster than standard
                 approaches, their accuracy outperforms sensible
                 baselines and they can ease the spread of information
                 in a network, for a varying range of conditions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hong:2015:CAR,
  author =       "Liang Hong and Lei Zou and Cheng Zeng and Luming Zhang
                 and Jian Wang and Jilei Tian",
  title =        "Context-Aware Recommendation Using Role-Based Trust
                 Network",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "13:1--13:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2751562",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recommender systems have been studied comprehensively
                 in both academic and industrial fields over the past
                 decade. As user interests can be affected by context at
                 any time and any place in mobile scenarios, rich
                 context information becomes more and more important for
                 personalized context-aware recommendations. Although
                 existing context-aware recommender systems can make
                 context-aware recommendations to some extent, they
                 suffer several inherent weaknesses: (1) Users'
                 context-aware interests are not modeled realistically,
                 which reduces the recommendation quality; (2) Current
                 context-aware recommender systems ignore trust
                 relations among users. Trust relations are actually
                 context-aware and associated with certain aspects
                 (i.e., categories of items) in mobile scenarios. In
                 this article, we define a term role to model common
                 context-aware interests among a group of users. We
                 propose an efficient role mining algorithm to mine
                 roles from a ``user-context-behavior'' matrix, and a
                 role-based trust model to calculate context-aware trust
                 value between two users. During online recommendation,
                 given a user u in a context c, an efficient weighted
                 set similarity query (WSSQ) algorithm is designed to
                 build u 's role-based trust network in context c.
                 Finally, we make recommendations to u based on u 's
                 role-based trust network by considering both
                 context-aware roles and trust relations. Extensive
                 experiments demonstrate that our recommendation
                 approach outperforms the state-of-the-art methods in
                 both effectiveness and efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2015:OBF,
  author =       "Lei Zhang and Ping Luo and Linpeng Tang and Enhong
                 Chen and Qi Liu and Min Wang and Hui Xiong",
  title =        "Occupancy-Based Frequent Pattern Mining",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "14:1--14:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2753765",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Frequent pattern mining is an important data mining
                 problem with many broad applications. Most studies in
                 this field use support (frequency) to measure the
                 popularity of a pattern, namely the fraction of
                 transactions or sequences that include the pattern in a
                 data set. In this study, we introduce a new interesting
                 measure, namely occupancy, to measure the completeness
                 of a pattern in its supporting transactions or
                 sequences. This is motivated by some real-world pattern
                 recommendation applications in which an interesting
                 pattern should not only be frequent, but also occupies
                 a large portion of its supporting transactions or
                 sequences. With the definition of occupancy we call a
                 pattern dominant if its occupancy value is above a
                 user-specified threshold. Then, our task is to identify
                 the qualified patterns which are both dominant and
                 frequent. Also, we formulate the problem of mining
                 top-k qualified patterns, that is, finding k qualified
                 patterns with maximum values on a user-defined function
                 of support and occupancy, for example, weighted sum of
                 support and occupancy. The challenge to these tasks is
                 that the value of occupancy does not change
                 monotonically when more items are appended to a given
                 pattern. Therefore, we propose a general algorithm
                 called DOFRA (DOminant and FRequent pattern mining
                 Algorithm) for mining these qualified patterns, which
                 explores the upper bound properties on occupancy to
                 drastically reduce the search process. Finally, we show
                 the effectiveness of DOFRA in two real-world
                 applications and also demonstrate the efficiency of
                 DOFRA on several real and large synthetic datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2015:AAS,
  author =       "Hung-Hsuan Chen and C. Lee Giles",
  title =        "{ASCOS++}: an Asymmetric Similarity Measure for
                 Weighted Networks to Address the Problem of {SimRank}",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "15:1--15:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2776894",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article, we explore the relationships among
                 digital objects in terms of their similarity based on
                 vertex similarity measures. We argue that SimRank --- a
                 famous similarity measure --- and its families, such as
                 P-Rank and SimRank++, fail to capture similar node
                 pairs in certain conditions, especially when two nodes
                 can only reach each other through paths of odd lengths.
                 We present new similarity measures ASCOS and ASCOS++ to
                 address the problem. ASCOS outputs a more complete
                 similarity score than SimRank and SimRank's families.
                 ASCOS++ enriches ASCOS to include edge weight into the
                 measure, giving all edges and network weights an
                 opportunity to make their contribution. We show that
                 both ASCOS++ and ASCOS can be reformulated and applied
                 on a distributed environment for parallel contribution.
                 Experimental results show that ASCOS++ reports a better
                 score than SimRank and several famous similarity
                 measures. Finally, we re-examine previous use cases of
                 SimRank, and explain appropriate and inappropriate use
                 cases. We suggest future SimRank users following the
                 rules proposed here before na{\"\i}vely applying it. We
                 also discuss the relationship between ASCOS++ and
                 PageRank.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zafarani:2015:UIA,
  author =       "Reza Zafarani and Lei Tang and Huan Liu",
  title =        "User Identification Across Social Media",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "16:1--16:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2747880",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "People use various social media sites for different
                 purposes. The information on each site is often
                 partial. When sources of complementary information are
                 integrated, a better profile of a user can be built.
                 This profile can help improve online services such as
                 advertising across sites. To integrate these sources of
                 information, it is necessary to identify individuals
                 across social media sites. This paper aims to address
                 the cross-media user identification problem. We provide
                 evidence on the existence of a mapping among identities
                 of individuals across social media sites, study the
                 feasibility of finding this mapping, and illustrate and
                 develop means for finding this mapping. Our studies
                 show that effective approaches that exploit information
                 redundancies due to users' unique behavioral patterns
                 can be utilized to find such a mapping. This study
                 paves the way for analysis and mining across social
                 networking sites, and facilitates the creation of novel
                 online services across sites. In particular,
                 recommending friends and advertising across networks,
                 analyzing information diffusion across sites, and
                 studying specific user behavior such as user migration
                 across sites in social media are one of the many areas
                 that can benefit from the results of this study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2015:RUC,
  author =       "Lei Li and Wei Peng and Saurabh Kataria and Tong Sun
                 and Tao Li",
  title =        "Recommending Users and Communities in Social Media",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "17:1--17:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2757282",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Social media has become increasingly prevalent in the
                 last few years, not only enabling people to connect
                 with each other by social links, but also providing
                 platforms for people to share information and interact
                 over diverse topics. Rich user-generated information,
                 for example, users' relationships and daily posts, are
                 often available in most social media service websites.
                 Given such information, a challenging problem is to
                 provide reasonable user and community recommendation
                 for a target user, and consequently, help the target
                 user engage in the daily discussions and activities
                 with his/her friends or like-minded people. In this
                 article, we propose a unified framework of recommending
                 users and communities that utilizes the information in
                 social media. Given a user's profile or a set of
                 keywords as input, our framework is capable of
                 recommending influential users and topic-cohesive
                 interactive communities that are most relevant to the
                 given user or keywords. With the proposed framework,
                 users can find other individuals or communities sharing
                 similar interests, and then have more interaction with
                 these users or within the communities. We present a
                 generative topic model to discover user-oriented and
                 community-oriented topics simultaneously, which enables
                 us to capture the exact topical interests of users, as
                 well as the focuses of communities. Extensive
                 experimental evaluation and case studies on a dataset
                 collected from Twitter demonstrate the effectiveness of
                 our proposed framework compared with other
                 probabilistic-topic-model-based recommendation
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2015:GGA,
  author =       "Rose Yu and Xinran He and Yan Liu",
  title =        "{GLAD}: Group Anomaly Detection in Social Media
                 Analysis",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "18:1--18:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2811268",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Traditional anomaly detection on social media mostly
                 focuses on individual point anomalies while anomalous
                 phenomena usually occur in groups. Therefore, it is
                 valuable to study the collective behavior of
                 individuals and detect group anomalies. Existing group
                 anomaly detection approaches rely on the assumption
                 that the groups are known, which can hardly be true in
                 real world social media applications. In this article,
                 we take a generative approach by proposing a
                 hierarchical Bayes model: Group Latent Anomaly
                 Detection (GLAD) model. GLAD takes both pairwise and
                 point-wise data as input, automatically infers the
                 groups and detects group anomalies simultaneously. To
                 account for the dynamic properties of the social media
                 data, we further generalize GLAD to its dynamic
                 extension d-GLAD. We conduct extensive experiments to
                 evaluate our models on both synthetic and real world
                 datasets. The empirical results demonstrate that our
                 approach is effective and robust in discovering latent
                 groups and detecting group anomalies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chakrabarti:2015:BPL,
  author =       "Aniket Chakrabarti and Venu Satuluri and Atreya
                 Srivathsan and Srinivasan Parthasarathy",
  title =        "A {Bayesian} Perspective on Locality Sensitive Hashing
                 with Extensions for Kernel Methods",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "19:1--19:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2778990",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/hash.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a collection of objects and an associated
                 similarity measure, the all-pairs similarity search
                 problem asks us to find all pairs of objects with
                 similarity greater than a certain user-specified
                 threshold. In order to reduce the number of candidates
                 to search, locality-sensitive hashing (LSH) based
                 indexing methods are very effective. However, most such
                 methods only use LSH for the first phase of similarity
                 search --- that is, efficient indexing for candidate
                 generation. In this article, we present BayesLSH, a
                 principled Bayesian algorithm for the subsequent phase
                 of similarity search --- performing candidate pruning
                 and similarity estimation using LSH. A simpler variant,
                 BayesLSH-Lite, which calculates similarities exactly,
                 is also presented. Our algorithms are able to quickly
                 prune away a large majority of the false positive
                 candidate pairs, leading to significant speedups over
                 baseline approaches. For BayesLSH, we also provide
                 probabilistic guarantees on the quality of the output,
                 both in terms of accuracy and recall. Finally, the
                 quality of BayesLSH's output can be easily tuned and
                 does not require any manual setting of the number of
                 hashes to use for similarity estimation, unlike
                 standard approaches. For two state-of-the-art candidate
                 generation algorithms, AllPairs and LSH, BayesLSH
                 enables significant speedups, typically in the range 2
                 $ \times $ --20 $ \times $ for a wide variety of
                 datasets. We also extend the BayesLSH algorithm for
                 kernel methods --- in which the similarity between two
                 data objects is defined by a kernel function. Since the
                 embedding of data points in the transformed kernel
                 space is unknown, algorithms such as AllPairs which
                 rely on building inverted index structure for fast
                 similarity search do not work with kernel functions.
                 Exhaustive search across all possible pairs is also not
                 an option since the dataset can be huge and computing
                 the kernel values for each pair can be prohibitive. We
                 propose K-BayesLSH an all-pairs similarity search
                 problem for kernel functions. K-BayesLSH leverages a
                 recently proposed idea --- kernelized locality
                 sensitive hashing (KLSH) --- for hash bit computation
                 and candidate generation, and uses the aforementioned
                 BayesLSH idea for candidate pruning and similarity
                 estimation. We ran a broad spectrum of experiments on a
                 variety of datasets drawn from different domains and
                 with distinct kernels and find a speedup of 2 $ \times
                 $ --7 $ \times $ over vanilla KLSH.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2015:DAV,
  author =       "Yao Zhang and B. Aditya Prakash",
  title =        "Data-Aware Vaccine Allocation Over Large Networks",
  journal =      j-TKDD,
  volume =       "10",
  number =       "2",
  pages =        "20:1--20:??",
  month =        oct,
  year =         "2015",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2803176",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Oct 26 17:19:18 MDT 2015",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a graph, like a social/computer network or the
                 blogosphere, in which an infection (or meme or virus)
                 has been spreading for some time, how to select the k
                 best nodes for immunization/quarantining immediately?
                 Most previous works for controlling propagation (say
                 via immunization) have concentrated on developing
                 strategies for vaccination preemptively before the
                 start of the epidemic. While very useful to provide
                 insights in to which baseline policies can best control
                 an infection, they may not be ideal to make real-time
                 decisions as the infection is progressing. In this
                 paper, we study how to immunize healthy nodes, in the
                 presence of already infected nodes. Efficient
                 algorithms for such a problem can help public-health
                 experts make more informed choices, tailoring their
                 decisions to the actual distribution of the epidemic on
                 the ground. First we formulate the Data-Aware
                 Vaccination problem, and prove it is NP-hard and also
                 that it is hard to approximate. Secondly, we propose
                 three effective polynomial-time heuristics DAVA,
                 DAVA-prune and DAVA-fast, of varying degrees of
                 efficiency and performance. Finally, we also
                 demonstrate the scalability and effectiveness of our
                 algorithms through extensive experiments on multiple
                 real networks including large epidemiology datasets
                 (containing millions of interactions). Our algorithms
                 show substantial gains of up to ten times more healthy
                 nodes at the end against many other intuitive and
                 nontrivial competitors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rowe:2016:MUD,
  author =       "Matthew Rowe",
  title =        "Mining User Development Signals for Online Community
                 Churner Detection",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2798730",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Churners are users who stop using a given service
                 after previously signing up. In the domain of
                 telecommunications and video games, churners represent
                 a loss of revenue as a user leaving indicates that they
                 will no longer pay for the service. In the context of
                 online community platforms (e.g., community message
                 boards, social networking sites, question--answering
                 systems, etc.), the churning of a user can represent
                 different kinds of loss: of social capital, of
                 expertise, or of a vibrant individual who is a mediator
                 for interaction and communication. Detecting which
                 users are likely to churn from online communities,
                 therefore, enables community managers to offer
                 incentives to entice those users back; as retention is
                 less expensive than re-signing users up. In this
                 article, we tackle the task of detecting churners on
                 four online community platforms by mining user
                 development signals. These signals explain how users
                 have evolved along different dimensions (i.e., social
                 and lexical) relative to their prior behaviour and the
                 community in which they have interacted. We present a
                 linear model, based upon elastic-net regularisation,
                 that uses extracted features from the signals to detect
                 churners. Our evaluation of this model against several
                 state of the art baselines, including our own prior
                 work, empirically demonstrates the superior performance
                 that this approach achieves for several experimental
                 settings. This article presents a novel approach to
                 churn prediction that takes a different route from
                 existing approaches that are based on measuring static
                 social network properties of users (e.g., centrality,
                 in-degree, etc.).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Prat-Perez:2016:PTT,
  author =       "Arnau Prat-P{\'e}rez and David Dominguez-Sal and
                 Josep-M. Brunat and Josep-Lluis Larriba-Pey",
  title =        "Put Three and Three Together: Triangle-Driven
                 Community Detection",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2775108",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community detection has arisen as one of the most
                 relevant topics in the field of graph data mining due
                 to its applications in many fields such as biology,
                 social networks, or network traffic analysis. Although
                 the existing metrics used to quantify the quality of a
                 community work well in general, under some
                 circumstances, they fail at correctly capturing such
                 notion. The main reason is that these metrics consider
                 the internal community edges as a set, but ignore how
                 these actually connect the vertices of the community.
                 We propose the Weighted Community Clustering (WCC),
                 which is a new community metric that takes the triangle
                 instead of the edge as the minimal structural motif
                 indicating the presence of a strong relation in a
                 graph. We theoretically analyse WCC in depth and
                 formally prove, by means of a set of properties, that
                 the maximization of WCC guarantees communities with
                 cohesion and structure. In addition, we propose
                 Scalable Community Detection (SCD), a community
                 detection algorithm based on WCC, which is designed to
                 be fast and scalable on SMP machines, showing
                 experimentally that WCC correctly captures the concept
                 of community in social networks using real datasets.
                 Finally, using ground-truth data, we show that SCD
                 provides better quality than the best disjoint
                 community detection algorithms of the state of the art
                 while performing faster.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2016:MDM,
  author =       "Zhen Guo and Zhongfei (Mark) Zhang and Eric P. Xing
                 and Christos Faloutsos",
  title =        "Multimodal Data Mining in a Multimedia Database Based
                 on Structured Max Margin Learning",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2742549",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Mining knowledge from a multimedia database has
                 received increasing attentions recently since huge
                 repositories are made available by the development of
                 the Internet. In this article, we exploit the relations
                 among different modalities in a multimedia database and
                 present a framework for general multimodal data mining
                 problem where image annotation and image retrieval are
                 considered as the special cases. Specifically, the
                 multimodal data mining problem can be formulated as a
                 structured prediction problem where we learn the
                 mapping from an input to the structured and
                 interdependent output variables. In addition, in order
                 to reduce the demanding computation, we propose a new
                 max margin structure learning approach called Enhanced
                 Max Margin Learning (EMML) framework, which is much
                 more efficient with a much faster convergence rate than
                 the existing max margin learning methods, as verified
                 through empirical evaluations. Furthermore, we apply
                 EMML framework to develop an effective and efficient
                 solution to the multimodal data mining problem that is
                 highly scalable in the sense that the query response
                 time is independent of the database scale. The EMML
                 framework allows an efficient multimodal data mining
                 query in a very large scale multimedia database, and
                 excels many existing multimodal data mining methods in
                 the literature that do not scale up at all. The
                 performance comparison with a state-of-the-art
                 multimodal data mining method is reported for the
                 real-world image databases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Myers:2016:DAK,
  author =       "Risa B. Myers and John C. Frenzel MD and Joseph R.
                 Ruiz Md and Christopher M. Jermaine",
  title =        "Do Anesthesiologists Know What They Are Doing?
                 {Mining} a Surgical Time-Series Database to Correlate
                 Expert Assessment with Outcomes",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2822897",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Anesthesiologists are taught to carefully manage
                 patient vital signs during surgery. Unfortunately,
                 there is little empirical evidence that vital sign
                 management, as currently practiced, is correlated with
                 patient outcomes. We seek to validate or repudiate
                 current clinical practice and determine whether or not
                 clinician evaluation of surgical vital signs correlate
                 with outcomes. Using a database of over 90,000 cases,
                 we attempt to determine whether those cases that
                 anesthesiologists would subjectively decide are ``low
                 quality'' are more likely to result in negative
                 outcomes. The problem reduces to one of
                 multi-dimensional time-series classification. Our
                 approach is to have a set of expert anesthesiologists
                 independently label a small number of training cases,
                 from which we build classifiers and label all 90,000
                 cases. We then use the labeling to search for
                 correlation with outcomes and compare the prevalence of
                 important 30-day outcomes between providers. To mimic
                 the providers' quality labels, we consider several
                 standard classification methods, such as dynamic time
                 warping in conjunction with a kNN classifier, as well
                 as complexity invariant distance, and a regression
                 based upon the feature extraction methods outlined by
                 Mao et al. 2012 (using features such as time-series
                 mean, standard deviation, skew, etc.). We also propose
                 a new feature selection mechanism that learns a hidden
                 Markov model to segment the time series; the fraction
                 of time that each series spends in each state is used
                 to label the series using a regression-based
                 classifier. In the end, we obtain strong, empirical
                 evidence that current best practice is correlated with
                 reduced negative patient outcomes. We also learn that
                 all of the experts were able to significantly separate
                 cases by outcome, with higher prevalence of negative
                 30-day outcomes in the cases labeled as ``low quality''
                 for almost all of the outcomes investigated.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Namata:2016:CGI,
  author =       "Galileo Mark Namata and Ben London and Lise Getoor",
  title =        "Collective Graph Identification",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2818378",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Data describing networks---such as communication
                 networks, transaction networks, disease transmission
                 networks, collaboration networks, etc.---are becoming
                 increasingly available. While observational data can be
                 useful, it often only hints at the actual underlying
                 process that governs interactions and attributes. For
                 example, an email communication network provides
                 insight into its users and their relationships, but is
                 not the same as the ``real'' underlying social network.
                 In this article, we introduce the problem of graph
                 identification, i.e., discovering the latent graph
                 structure underlying an observed network. We cast the
                 problem as a probabilistic inference task, in which we
                 must infer the nodes, edges, and node labels of a
                 hidden graph, based on evidence. This entails solving
                 several canonical problems in network analysis: entity
                 resolution (determining when two observations
                 correspond to the same entity), link prediction
                 (inferring the existence of links), and node labeling
                 (inferring hidden attributes). While each of these
                 subproblems has been well studied in isolation, here we
                 consider them as a single, collective task. We present
                 a simple, yet novel, approach to address all three
                 subproblems simultaneously. Our approach, which we
                 refer to as C$^3$, consists of a collection of Coupled
                 Collective Classifiers that are applied iteratively to
                 propagate inferred information among the subproblems.
                 We consider variants of C$^3$ using different learning
                 and inference techniques and empirically demonstrate
                 that C$^3$ is superior, both in terms of predictive
                 accuracy and running time, to state-of-the-art
                 probabilistic approaches on four real problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Subbian:2016:MIU,
  author =       "Karthik Subbian and Charu Aggarwal and Jaideep
                 Srivastava",
  title =        "Mining Influencers Using Information Flows in Social
                 Streams",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2815625",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The problem of discovering information flow trends in
                 social networks has become increasingly relevant due to
                 the increasing amount of content in online social
                 networks, and its relevance as a tool for research into
                 the content trends analysis in the network. An
                 important part of this analysis is to determine the key
                 patterns of flow in the underlying network. Almost all
                 the work in this area has focused on fixed models of
                 the network structure, and edge-based transmission
                 between nodes. In this article, we propose a fully
                 content-centered model of flow analysis in networks, in
                 which the analysis is based on actual content
                 transmissions in the underlying social stream, rather
                 than a static model of transmission on the edges.
                 First, we introduce the problem of influence analysis
                 in the context of information flow in networks. We then
                 propose a novel algorithm InFlowMine to discover the
                 information flow patterns in the network and
                 demonstrate the effectiveness of the discovered
                 information flows using an influence mining
                 application. This application illustrates the
                 flexibility and effectiveness of our information flow
                 model to find topic- or network-specific influencers,
                 or their combinations. We empirically show that our
                 information flow mining approach is effective and
                 efficient than the existing methods on a number of
                 different measures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Angiulli:2016:TGU,
  author =       "Fabrizio Angiulli and Fabio Fassetti",
  title =        "Toward Generalizing the Unification with Statistical
                 Outliers: The Gradient Outlier Factor Measure",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2829956",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this work, we introduce a novel definition of
                 outlier, namely the Gradient Outlier Factor (or GOF),
                 with the aim to provide a definition that unifies with
                 the statistical one on some standard distributions but
                 has a different behavior in the presence of mixture
                 distributions. Intuitively, the GOF score measures the
                 probability to stay in the neighborhood of a certain
                 object. It is directly proportional to the density and
                 inversely proportional to the variation of the density.
                 We derive formal properties under which the GOF
                 definition unifies the statistical outlier definition
                 and show that the unification holds for some standard
                 distributions, while the GOF is able to capture tails
                 in the presence of different distributions even if
                 their densities sensibly differ. Moreover, we provide a
                 probabilistic interpretation of the GOF score, by means
                 of the notion of density of the data density.
                 Experimental results confirm that there are scenarios
                 in which the novel definition can be profitably
                 employed. To the best of our knowledge, except for
                 distance-based outlier, no other data mining outlier
                 definition has a so clearly established relationship
                 with statistical outliers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Koutra:2016:DPM,
  author =       "Danai Koutra and Neil Shah and Joshua T. Vogelstein
                 and Brian Gallagher and Christos Faloutsos",
  title =        "{DeltaCon}: Principled Massive-Graph Similarity
                 Function with Attribution",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2824443",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How much has a network changed since yesterday? How
                 different is the wiring of Bob's brain (a left-handed
                 male) and Alice's brain (a right-handed female), and
                 how is it different? Graph similarity with given node
                 correspondence, i.e., the detection of changes in the
                 connectivity of graphs, arises in numerous settings. In
                 this work, we formally state the axioms and desired
                 properties of the graph similarity functions, and
                 evaluate when state-of-the-art methods fail to detect
                 crucial connectivity changes in graphs. We propose D
                 eltaCon, a principled, intuitive, and scalable
                 algorithm that assesses the similarity between two
                 graphs on the same nodes (e.g., employees of a company,
                 customers of a mobile carrier). In conjunction, we
                 propose DeltaCon-Attr, a related approach that enables
                 attribution of change or dissimilarity to responsible
                 nodes and edges. Experiments on various synthetic and
                 real graphs showcase the advantages of our method over
                 existing similarity measures. Finally, we employ
                 DeltaCon and DeltaCon-Attr on real applications: (a) we
                 classify people to groups of high and low creativity
                 based on their brain connectivity graphs, (b) do
                 temporal anomaly detection in the who-emails-whom Enron
                 graph and find the top culprits for the changes in the
                 temporal corporate email graph, and (c) recover pairs
                 of test-retest large brain scans ({\sim}17M edges, up
                 to 90M edges) for 21 subjects.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2016:MPA,
  author =       "Wayne Xin Zhao and Jinpeng Wang and Yulan He and
                 Ji-Rong Wen and Edward Y. Chang and Xiaoming Li",
  title =        "Mining Product Adopter Information from Online Reviews
                 for Improving Product Recommendation",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2842629",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present in this article an automated framework that
                 extracts product adopter information from online
                 reviews and incorporates the extracted information into
                 feature-based matrix factorization for more effective
                 product recommendation. In specific, we propose a
                 bootstrapping approach for the extraction of product
                 adopters from review text and categorize them into a
                 number of different demographic categories. The
                 aggregated demographic information of many product
                 adopters can be used to characterize both products and
                 users in the form of distributions over different
                 demographic categories. We further propose a
                 graph-based method to iteratively update user- and
                 product-related distributions more reliably in a
                 heterogeneous user--product graph and incorporate them
                 as features into the matrix factorization approach for
                 product recommendation. Our experimental results on a
                 large dataset crawled from J ingDong, the largest B2C
                 e-commerce website in China, show that our proposed
                 framework outperforms a number of competitive baselines
                 for product recommendation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Duarte:2016:AMR,
  author =       "Jo{\~a}o Duarte and Jo{\~a}o Gama and Albert Bifet",
  title =        "Adaptive Model Rules From High-Speed Data Streams",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "30:1--30:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2829955",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Decision rules are one of the most expressive and
                 interpretable models for machine learning. In this
                 article, we present Adaptive Model Rules (AMRules), the
                 first stream rule learning algorithm for regression
                 problems. In AMRules, the antecedent of a rule is a
                 conjunction of conditions on the attribute values, and
                 the consequent is a linear combination of the
                 attributes. In order to maintain a regression model
                 compatible with the most recent state of the process
                 generating data, each rule uses a Page-Hinkley test to
                 detect changes in this process and react to changes by
                 pruning the rule set. Online learning might be strongly
                 affected by outliers. AMRules is also equipped with
                 outliers detection mechanisms to avoid model adaption
                 using anomalous examples. In the experimental section,
                 we report the results of AMRules on benchmark
                 regression problems, and compare the performance of our
                 system with other streaming regression algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lu:2016:SCB,
  author =       "Faming Lu and Qingtian Zeng and Hua Duan",
  title =        "Synchronization-Core-Based Discovery of Processes with
                 Decomposable Cyclic Dependencies",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "31:1--31:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2845086",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Traditional process discovery techniques mine process
                 models based upon event traces giving little
                 consideration to workflow relevant data recorded in
                 event logs. The neglect of such information usually
                 leads to incorrect discovered models, especially when
                 activities have decomposable cyclic dependencies. To
                 address this problem, the recorded workflow relevant
                 data and decision tree learning technique are utilized
                 to classify cases into case clusters. Each case cluster
                 contains causality and concurrency activity
                 dependencies only. Then, a set of activity ordering
                 relations are derived based on case clusters. And a
                 synchronization-core-based process model is discovered
                 from the ordering relations and composite cases.
                 Finally, the discovered model is transformed to a BPMN
                 model. The proposed approach is validated with a
                 medical treatment process and an open event log.
                 Meanwhile, a prototype system is presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2016:EAW,
  author =       "Yashu Liu and Jie Wang and Jieping Ye",
  title =        "An Efficient Algorithm For Weak Hierarchical Lasso",
  journal =      j-TKDD,
  volume =       "10",
  number =       "3",
  pages =        "32:1--32:??",
  month =        feb,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2791295",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 25 05:56:34 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Linear regression is a widely used tool in data mining
                 and machine learning. In many applications, fitting a
                 regression model with only linear effects may not be
                 sufficient for predictive or explanatory purposes. One
                 strategy that has recently received increasing
                 attention in statistics is to include feature
                 interactions to capture the nonlinearity in the
                 regression model. Such model has been applied
                 successfully in many biomedical applications. One major
                 challenge in the use of such model is that the data
                 dimensionality is significantly higher than the
                 original data, resulting in the small sample size large
                 dimension problem. Recently, weak hierarchical Lasso, a
                 sparse interaction regression model, is proposed that
                 produces a sparse and hierarchical structured estimator
                 by exploiting the Lasso penalty and a set of
                 hierarchical constraints. However, the hierarchical
                 constraints make it a non-convex problem and the
                 existing method finds the solution to its convex
                 relaxation, which needs additional conditions to
                 guarantee the hierarchical structure. In this article,
                 we propose to directly solve the non-convex weak
                 hierarchical Lasso by making use of the General
                 Iterative Shrinkage and Thresholding (GIST)
                 optimization framework, which has been shown to be
                 efficient for solving non-convex sparse formulations.
                 The key step in GIST is to compute a sequence of
                 proximal operators. One of our key technical
                 contributions is to show that the proximal operator
                 associated with the non-convex weak hierarchical Lasso
                 admits a closed-form solution. However, a naive
                 approach for solving each subproblem of the proximal
                 operator leads to a quadratic time complexity, which is
                 not desirable for large-size problems. We have
                 conducted extensive experiments on both synthetic and
                 real datasets. Results show that our proposed algorithm
                 is much more efficient and effective than its convex
                 relaxation. To this end, we further develop an
                 efficient algorithm for computing the subproblems with
                 a linearithmic time complexity. In addition, we extend
                 the technique to perform the optimization-based
                 hierarchical testing of pairwise interactions for
                 binary classification problems, which is essentially
                 the proximal operator associated with weak hierarchical
                 Lasso. Simulation studies show that the non-convex
                 hierarchical testing framework outperforms the convex
                 relaxation when a hierarchical structure exists between
                 main effects and interactions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2016:ISI,
  author =       "Wei Wang and Jure Leskovec",
  title =        "Introduction to the Special Issue of Best Papers in
                 {ACM SIGKDD 2014}",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "33:1--33:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2936718",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2016:PSP,
  author =       "Silei Xu and John C. S. Lui",
  title =        "Product Selection Problem: Improve Market Share by
                 Learning Consumer Behavior",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "34:1--34:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2753764",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "It is often crucial for manufacturers to decide what
                 products to produce so that they can increase their
                 market share in an increasingly fierce market. To
                 decide which products to produce, manufacturers need to
                 analyze the consumers' requirements and how consumers
                 make their purchase decisions so that the new products
                 will be competitive in the market. In this paper, we
                 first present a general distance-based product adoption
                 model to capture consumers' purchase behavior. Using
                 this model, various distance metrics can be used to
                 describe different real life purchase behavior. We then
                 provide a learning algorithm to decide which set of
                 distance metrics one should use when we are given some
                 accessible historical purchase data. Based on the
                 product adoption model, we formalize the k most
                 marketable products (or $k$-MMP) selection problem and
                 formally prove that the problem is NP-hard. To tackle
                 this problem, we propose an efficient greedy-based
                 approximation algorithm with a provable solution
                 guarantee. Using submodularity analysis, we prove that
                 our approximation algorithm can achieve at least 63\%
                 of the optimal solution. We apply our algorithm on both
                 synthetic datasets and real-world datasets
                 (TripAdvisor.com), and show that our algorithm can
                 easily achieve five or more orders of speedup over the
                 exhaustive search and achieve about 96\% of the optimal
                 solution on average. Our experiments also demonstrate
                 the robustness of our distance metric learning method,
                 and illustrate how one can adopt it to improve the
                 accuracy of product selection.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2016:CSB,
  author =       "Meng Jiang and Peng Cui and Alex Beutel and Christos
                 Faloutsos and Shiqiang Yang",
  title =        "Catching Synchronized Behaviors in Large Networks: a
                 Graph Mining Approach",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2746403",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a directed graph of millions of nodes, how can
                 we automatically spot anomalous, suspicious nodes
                 judging only from their connectivity patterns?
                 Suspicious graph patterns show up in many applications,
                 from Twitter users who buy fake followers, manipulating
                 the social network, to botnet members performing
                 distributed denial of service attacks, disturbing the
                 network traffic graph. We propose a fast and effective
                 method, C atchSync, which exploits two of the tell-tale
                 signs left in graphs by fraudsters: (a) synchronized
                 behavior: suspicious nodes have extremely similar
                 behavior patterns because they are often required to
                 perform some task together (such as follow the same
                 user); and (b) rare behavior: their connectivity
                 patterns are very different from the majority. We
                 introduce novel measures to quantify both concepts
                 (``synchronicity'' and ``normality'') and we propose a
                 parameter-free algorithm that works on the resulting
                 synchronicity-normality plots. Thanks to careful
                 design, CatchSync has the following desirable
                 properties: (a) it is scalable to large datasets, being
                 linear in the graph size; (b) it is parameter free; and
                 (c) it is side-information-oblivious: it can operate
                 using only the topology, without needing labeled data,
                 nor timing information, and the like., while still
                 capable of using side information if available. We
                 applied CatchSync on three large, real datasets,
                 1-billion-edge Twitter social graph, 3-billion-edge,
                 and 12-billion-edge Tencent Weibo social graphs, and
                 several synthetic ones; CatchSync consistently
                 outperforms existing competitors, both in detection
                 accuracy by 36\% on Twitter and 20\% on Tencent Weibo,
                 as well as in speed.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wei:2016:HTH,
  author =       "Ying Wei and Yangqiu Song and Yi Zhen and Bo Liu and
                 Qiang Yang",
  title =        "Heterogeneous Translated Hashing: a Scalable Solution
                 Towards Multi-Modal Similarity Search",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "36:1--36:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2744204",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/hash.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multi-modal similarity search has attracted
                 considerable attention to meet the need of information
                 retrieval across different types of media. To enable
                 efficient multi-modal similarity search in large-scale
                 databases recently, researchers start to study
                 multi-modal hashing. Most of the existing methods are
                 applied to search across multi-views among which
                 explicit correspondence is provided. Given a
                 multi-modal similarity search task, we observe that
                 abundant multi-view data can be found on the Web which
                 can serve as an auxiliary bridge. In this paper, we
                 propose a Heterogeneous Translated Hashing (HTH) method
                 with such auxiliary bridge incorporated not only to
                 improve current multi-view search but also to enable
                 similarity search across heterogeneous media which have
                 no direct correspondence. HTH provides more flexible
                 and discriminative ability by embedding heterogeneous
                 media into different Hamming spaces, compared to almost
                 all existing methods that map heterogeneous data in a
                 common Hamming space. We formulate a joint optimization
                 model to learn hash functions embedding heterogeneous
                 media into different Hamming spaces, and a translator
                 aligning different Hamming spaces. The extensive
                 experiments on two real-world datasets, one publicly
                 available dataset of Flickr, and the other
                 MIRFLICKR-Yahoo Answers dataset, highlight the
                 effectiveness and efficiency of our algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tong:2016:GES,
  author =       "Hanghang Tong and Fei Wang and Munmun De Choudhury and
                 Zoran Obradovic",
  title =        "Guest Editorial: Special Issue on Connected Health at
                 Big Data Era {(BigChat)}: a {TKDD} Special Issue",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "37:1--37:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2912122",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xiong:2016:KIT,
  author =       "Feiyu Xiong and Moshe Kam and Leonid Hrebien and
                 Beilun Wang and Yanjun Qi",
  title =        "Kernelized Information-Theoretic Metric Learning for
                 Cancer Diagnosis Using High-Dimensional Molecular
                 Profiling Data",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "38:1--38:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2789212",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the advancement of genome-wide monitoring
                 technologies, molecular expression data have become
                 widely used for diagnosing cancer through tumor or
                 blood samples. When mining molecular signature data,
                 the process of comparing samples through an adaptive
                 distance function is fundamental but difficult, as such
                 datasets are normally heterogeneous and high
                 dimensional. In this article, we present kernelized
                 information-theoretic metric learning (KITML)
                 algorithms that optimize a distance function to tackle
                 the cancer diagnosis problem and scale to high
                 dimensionality. By learning a nonlinear transformation
                 in the input space implicitly through kernelization,
                 KITML permits efficient optimization, low storage, and
                 improved learning of distance metric. We propose two
                 novel applications of KITML for diagnosing cancer using
                 high-dimensional molecular profiling data: (1) for
                 sample-level cancer diagnosis, the learned metric is
                 used to improve the performance of k -nearest neighbor
                 classification; and (2) for estimating the severity
                 level or stage of a group of samples, we propose a
                 novel set-based ranking approach to extend KITML. For
                 the sample-level cancer classification task, we have
                 evaluated on 14 cancer gene microarray datasets and
                 compared with eight other state-of-the-art approaches.
                 The results show that our approach achieves the best
                 overall performance for the task of
                 molecular-expression-driven cancer sample diagnosis.
                 For the group-level cancer stage estimation, we test
                 the proposed set-KITML approach using three multi-stage
                 cancer microarray datasets, and correctly estimated the
                 stages of sample groups for all three studies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2016:JML,
  author =       "Pei Yang and Hongxia Yang and Haoda Fu and Dawei Zhou
                 and Jieping Ye and Theodoros Lappas and Jingrui He",
  title =        "Jointly Modeling Label and Feature Heterogeneity in
                 Medical Informatics",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2768831",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multiple types of heterogeneity including label
                 heterogeneity and feature heterogeneity often co-exist
                 in many real-world data mining applications, such as
                 diabetes treatment classification, gene functionality
                 prediction, and brain image analysis. To effectively
                 leverage such heterogeneity, in this article, we
                 propose a novel graph-based model for Learning with
                 both Label and Feature heterogeneity, namely L$^2$F. It
                 models the label correlation by requiring that any two
                 label-specific classifiers behave similarly on the same
                 views if the associated labels are similar, and imposes
                 the view consistency by requiring that view-based
                 classifiers generate similar predictions on the same
                 examples. The objective function for L$^2$F is jointly
                 convex. To solve the optimization problem, we propose
                 an iterative algorithm, which is guaranteed to converge
                 to the global optimum. One appealing feature of L$^2$F
                 is that it is capable of handling data with missing
                 views and labels. Furthermore, we analyze its
                 generalization performance based on Rademacher
                 complexity, which sheds light on the benefits of
                 jointly modeling the label and feature heterogeneity.
                 Experimental results on various biomedical datasets
                 show the effectiveness of the proposed approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{WU:2016:MDN,
  author =       "Yubao WU and Xiaofeng Zhu and Li Li and Wei Fan and
                 Ruoming Jin and Xiang Zhang",
  title =        "Mining Dual Networks: Models, Algorithms, and
                 Applications",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2785970",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Finding the densest subgraph in a single graph is a
                 fundamental problem that has been extensively studied.
                 In many emerging applications, there exist dual
                 networks. For example, in genetics, it is important to
                 use protein interactions to interpret genetic
                 interactions. In this application, one network
                 represents physical interactions among nodes, for
                 example, protein--protein interactions, and another
                 network represents conceptual interactions, for
                 example, genetic interactions. Edges in the conceptual
                 network are usually derived based on certain
                 correlation measure or statistical test measuring the
                 strength of the interaction. Two nodes with strong
                 conceptual interaction may not have direct physical
                 interaction. In this article, we propose the novel
                 dual-network model and investigate the problem of
                 finding the densest connected subgraph (DCS), which has
                 the largest density in the conceptual network and is
                 also connected in the physical network. Density in the
                 conceptual network represents the average strength of
                 the measured interacting signals among the set of
                 nodes. Connectivity in the physical network shows how
                 they interact physically. Such pattern cannot be
                 identified using the existing algorithms for a single
                 network. We show that even though finding the densest
                 subgraph in a single network is polynomial time
                 solvable, the DCS problem is NP-hard. We develop a
                 two-step approach to solve the DCS problem. In the
                 first step, we effectively prune the dual networks,
                 while guarantee that the optimal solution is contained
                 in the remaining networks. For the second step, we
                 develop two efficient greedy methods based on different
                 search strategies to find the DCS. Different variations
                 of the DCS problem are also studied. We perform
                 extensive experiments on a variety of real and
                 synthetic dual networks to evaluate the effectiveness
                 and efficiency of the developed methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cui:2016:BOQ,
  author =       "Licong Cui and Shiqiang Tao and Guo-Qiang Zhang",
  title =        "Biomedical Ontology Quality Assurance Using a Big Data
                 Approach",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2768830",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article presents recent progresses made in using
                 scalable cloud computing environment, Hadoop and
                 MapReduce, to perform ontology quality assurance (OQA),
                 and points to areas of future opportunity. The standard
                 sequential approach used for implementing OQA methods
                 can take weeks if not months for exhaustive analyses
                 for large biomedical ontological systems. With OQA
                 methods newly implemented using massively parallel
                 algorithms in the MapReduce framework, several orders
                 of magnitude in speed-up can be achieved (e.g., from
                 three months to three hours). Such dramatically reduced
                 time makes it feasible not only to perform exhaustive
                 structural analysis of large ontological hierarchies,
                 but also to systematically track structural changes
                 between versions for evolutional analysis. As an
                 exemplar, progress is reported in using MapReduce to
                 perform evolutional analysis and visualization on the
                 Systemized Nomenclature of Medicine-Clinical Terms
                 (SNOMED CT), a prominent clinical terminology system.
                 Future opportunities in three areas are described: one
                 is to extend the scope of MapReduce-based approach to
                 existing OQA methods, especially for automated
                 exhaustive structural analysis. The second is to apply
                 our proposed MapReduce Pipeline for Lattice-based
                 Evaluation (MaPLE) approach, demonstrated as an
                 exemplar method for SNOMED CT, to other biomedical
                 ontologies. The third area is to develop interfaces for
                 reviewing results obtained by OQA methods and for
                 visualizing ontological alignment and evolution, which
                 can also take advantage of cloud computing technology
                 to systematically pre-compute computationally intensive
                 jobs in order to increase performance during user
                 interactions with the visualization interface. Advances
                 in these directions are expected to better support the
                 ontological engineering lifecycle.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rayana:2016:LMB,
  author =       "Shebuti Rayana and Leman Akoglu",
  title =        "Less is More: Building Selective Anomaly Ensembles",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2890508",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Ensemble learning for anomaly detection has been
                 barely studied, due to difficulty in acquiring ground
                 truth and the lack of inherent objective functions. In
                 contrast, ensemble approaches for classification and
                 clustering have been studied and effectively used for
                 long. Our work taps into this gap and builds a new
                 ensemble approach for anomaly detection, with
                 application to event detection in temporal graphs as
                 well as outlier detection in no-graph settings. It
                 handles and combines multiple heterogeneous detectors
                 to yield improved and robust performance. Importantly,
                 trusting results from all the constituent detectors may
                 deteriorate the overall performance of the ensemble, as
                 some detectors could provide inaccurate results
                 depending on the type of data in hand and the
                 underlying assumptions of a detector. This suggests
                 that combining the detectors selectively is key to
                 building effective anomaly ensembles-hence ``less is
                 more''. In this paper we propose a novel ensemble
                 approach called SELECT for anomaly detection, which
                 automatically and systematically selects the results
                 from constituent detectors to combine in a fully
                 unsupervised fashion. We apply our method to event
                 detection in temporal graphs and outlier detection in
                 multi-dimensional point data (no-graph), where SELECT
                 successfully utilizes five base detectors and seven
                 consensus methods under a unified ensemble framework.
                 We provide extensive quantitative evaluation of our
                 approach for event detection on five real-world
                 datasets (four with ground truth events), including
                 Enron email communications, RealityMining SMS and phone
                 call records, New York Times news corpus, and World Cup
                 2014 Twitter news feed. We also provide results for
                 outlier detection on seven real-world multi-dimensional
                 point datasets from UCI Machine Learning Repository.
                 Thanks to its selection mechanism, SELECT yields
                 superior performance compared to the individual
                 detectors alone, the full ensemble (naively combining
                 all results), an existing diversity-based ensemble, and
                 an existing weighted ensemble approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2016:CCS,
  author =       "Yada Zhu and Jingrui He",
  title =        "Co-Clustering Structural Temporal Data with
                 Applications to Semiconductor Manufacturing",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2875427",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recent years have witnessed data explosion in
                 semiconductor manufacturing due to advances in
                 instrumentation and storage techniques. The large
                 amount of data associated with process variables
                 monitored over time form a rich reservoir of
                 information, which can be used for a variety of
                 purposes, such as anomaly detection, quality control,
                 and fault diagnostics. In particular, following the
                 same recipe for a certain Integrated Circuit device,
                 multiple tools and chambers can be deployed for the
                 production of this device, during which multiple time
                 series can be collected, such as temperature,
                 impedance, gas flow, electric bias, etc. These time
                 series naturally fit into a two-dimensional array
                 (matrix), i.e., each element in this array corresponds
                 to a time series for one process variable from one
                 chamber. To leverage the rich structural information in
                 such temporal data, in this article, we propose a novel
                 framework named C-Struts to simultaneously cluster on
                 the two dimensions of this array. In this framework, we
                 interpret the structural information as a set of
                 constraints on the cluster membership, introduce an
                 auxiliary probability distribution accordingly, and
                 design an iterative algorithm to assign each time
                 series to a certain cluster on each dimension.
                 Furthermore, we establish the equivalence between
                 C-Struts and a generic optimization problem, which is
                 able to accommodate various distance functions.
                 Extensive experiments on synthetic, benchmark, as well
                 as manufacturing datasets demonstrate the effectiveness
                 of the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tahani:2016:IDD,
  author =       "Maryam Tahani and Ali M. A. Hemmatyar and Hamid R.
                 Rabiee and Maryam Ramezani",
  title =        "Inferring Dynamic Diffusion Networks in Online Media",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2882968",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Online media play an important role in information
                 societies by providing a convenient infrastructure for
                 different processes. Information diffusion that is a
                 fundamental process taking place on social and
                 information networks has been investigated in many
                 studies. Research on information diffusion in these
                 networks faces two main challenges: (1) In most cases,
                 diffusion takes place on an underlying network, which
                 is latent and its structure is unknown. (2) This latent
                 network is not fixed and changes over time. In this
                 article, we investigate the diffusion network
                 extraction (DNE) problem when the underlying network is
                 dynamic and latent. We model the diffusion behavior
                 (existence probability) of each edge as a stochastic
                 process and utilize the Hidden Markov Model (HMM) to
                 discover the most probable diffusion links according to
                 the current observation of the diffusion process, which
                 is the infection time of nodes and the past diffusion
                 behavior of links. We evaluate the performance of our
                 Dynamic Diffusion Network Extraction (DDNE) method, on
                 both synthetic and real datasets. Experimental results
                 show that the performance of the proposed method is
                 independent of the cascade transmission model and
                 outperforms the state of art method in terms of
                 F-measure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Koh:2016:URP,
  author =       "Yun Sing Koh and Sri Devi Ravana",
  title =        "Unsupervised Rare Pattern Mining: a Survey",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2898359",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Association rule mining was first introduced to
                 examine patterns among frequent items. The original
                 motivation for seeking these rules arose from need to
                 examine customer purchasing behaviour in supermarket
                 transaction data. It seeks to identify combinations of
                 items or itemsets, whose presence in a transaction
                 affects the likelihood of the presence of another
                 specific item or itemsets. In recent years, there has
                 been an increasing demand for rare association rule
                 mining. Detecting rare patterns in data is a vital
                 task, with numerous high-impact applications including
                 medical, finance, and security. This survey aims to
                 provide a general, comprehensive, and structured
                 overview of the state-of-the-art methods for rare
                 pattern mining. We investigate the problems in finding
                 rare rules using traditional association rule mining.
                 As rare association rule mining has not been well
                 explored, there is still specific groundwork that needs
                 to be established. We will discuss some of the major
                 issues in rare association rule mining and also look at
                 current algorithms. As a contribution, we give a
                 general framework for categorizing algorithms: Apriori
                 and Tree based. We highlight the differences between
                 these methods. Finally, we present several real-world
                 application using rare pattern mining in diverse
                 domains. We conclude our survey with a discussion on
                 open and practical challenges in the field.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2016:CFR,
  author =       "Wei Cheng and Zhishan Guo and Xiang Zhang and Wei
                 Wang",
  title =        "{CGC}: a Flexible and Robust Approach to Integrating
                 Co-Regularized Multi-Domain Graph for Clustering",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2903147",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Multi-view graph clustering aims to enhance clustering
                 performance by integrating heterogeneous information
                 collected in different domains. Each domain provides a
                 different view of the data instances. Leveraging
                 cross-domain information has been demonstrated an
                 effective way to achieve better clustering results.
                 Despite the previous success, existing multi-view graph
                 clustering methods usually assume that different views
                 are available for the same set of instances. Thus,
                 instances in different domains can be treated as having
                 strict one-to-one relationship. In many real-life
                 applications, however, data instances in one domain may
                 correspond to multiple instances in another domain.
                 Moreover, relationships between instances in different
                 domains may be associated with weights based on prior
                 (partial) knowledge. In this article, we propose a
                 flexible and robust framework, Co-regularized Graph
                 Clustering (CGC), based on non-negative matrix
                 factorization (NMF), to tackle these challenges. CGC
                 has several advantages over the existing methods.
                 First, it supports many-to-many cross-domain instance
                 relationship. Second, it incorporates weight on
                 cross-domain relationship. Third, it allows partial
                 cross-domain mapping so that graphs in different
                 domains may have different sizes. Finally, it provides
                 users with the extent to which the cross-domain
                 instance relationship violates the in-domain clustering
                 structure, and thus enables users to re-evaluate the
                 consistency of the relationship. We develop an
                 efficient optimization method that guarantees to find
                 the global optimal solution with a given confidence
                 requirement. The proposed method can automatically
                 identify noisy domains and assign smaller weights to
                 them. This helps to obtain optimal graph partition for
                 the focused domain. Extensive experimental results on
                 UCI benchmark datasets, newsgroup datasets, and
                 biological interaction networks demonstrate the
                 effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shen:2016:SPO,
  author =       "Chih-Ya Shen and De-Nian Yang and Wang-Chien Lee and
                 Ming-Syan Chen",
  title =        "Spatial-Proximity Optimization for Rapid Task Group
                 Deployment",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2818714",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Spatial proximity is one of the most important factors
                 for the quick deployment of the task groups in various
                 time-sensitive missions. This article proposes a new
                 spatial query, Spatio-Social Team Query (SSTQ), that
                 forms a strong task group by considering (1) the
                 group's spatial distance (i.e., transportation time),
                 (2) skills of the candidate group members, and (3)
                 social rapport among the candidates. Efficient
                 processing of SSTQ is very challenging, because the
                 aforementioned spatial, skill, and social factors need
                 to be carefully examined. In this article, therefore,
                 we first formulate two subproblems of SSTQ, namely
                 Hop-Constrained Team Problem (HCTP) and
                 Connection-Oriented Team Query (COTQ). HCTP is a
                 decision problem that considers only social and skill
                 dimensions. We prove that HCTP is NP-Complete.
                 Moreover, based on the hardness of HCTP, we prove that
                 SSTQ is NP-Hard and inapproximable within any factor.
                 On the other hand, COTQ is a special case of SSTQ that
                 relaxes the social constraint. We prove that COTQ is
                 NP-Hard and propose an approximation algorithm for
                 COTQ, namely COTprox. Furthermore, based on the
                 observations on COTprox, we devise an approximation
                 algorithm, SSTprox, with a guaranteed error bound for
                 SSTQ. Finally, to efficiently obtain the optimal
                 solution to SSTQ for small instances, we design two
                 efficient algorithms, SpatialFirst and SkillFirst, with
                 different scenarios in mind. These two algorithms
                 incorporate various effective ordering and pruning
                 techniques to reduce the search space for answering
                 SSTQ. Experimental results on real datasets indicate
                 that the proposed algorithms can efficiently answer
                 SSTQ under various parameter settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2016:FDV,
  author =       "Zhiwen Yu and Zhitao Wang and Liming Chen and Bin Guo
                 and Wenjie Li",
  title =        "Featuring, Detecting, and Visualizing Human Sentiment
                 in {Chinese} Micro-Blog",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2821513",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Micro-blog has been increasingly used for the public
                 to express their opinions, and for organizations to
                 detect public sentiment about social events or public
                 policies. In this article, we examine and identify the
                 key problems of this field, focusing particularly on
                 the characteristics of innovative words, multi-media
                 elements, and hierarchical structure of Chinese
                 ``Weibo.'' Based on the analysis, we propose a novel
                 approach and develop associated theoretical and
                 technological methods to address these problems. These
                 include a new sentiment word mining method based on
                 three wording metrics and point-wise information, a
                 rule set model for analyzing sentiment features of
                 different linguistic components, and the corresponding
                 methodology for calculating sentiment on
                 multi-granularity considering emoticon elements as
                 auxiliary affective factors. We evaluate our new word
                 discovery and sentiment detection methods on a
                 real-life Chinese micro-blog dataset. Initial results
                 show that our new diction can improve sentiment
                 detection, and they demonstrate that our multi-level
                 rule set method is more effective, with the average
                 accuracy being 10.2\% and 1.5\% higher than two
                 existing methods for Chinese micro-blog sentiment
                 analysis. In addition, we exploit visualization
                 techniques to study the relationships between online
                 sentiment and real life. The visualization of detected
                 sentiment can help depict temporal patterns and spatial
                 discrepancy.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2016:EOL,
  author =       "Chen Chen and Hanghang Tong and B. Aditya Prakash and
                 Tina Eliassi-Rad and Michalis Faloutsos and Christos
                 Faloutsos",
  title =        "Eigen-Optimization on Large Graphs by Edge
                 Manipulation",
  journal =      j-TKDD,
  volume =       "10",
  number =       "4",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2903148",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:29 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Large graphs are prevalent in many applications and
                 enable a variety of information dissemination
                 processes, e.g., meme, virus, and influence
                 propagation. How can we optimize the underlying graph
                 structure to affect the outcome of such dissemination
                 processes in a desired way (e.g., stop a virus
                 propagation, facilitate the propagation of a piece of
                 good idea, etc)? Existing research suggests that the
                 leading eigenvalue of the underlying graph is the key
                 metric in determining the so-called epidemic threshold
                 for a variety of dissemination models. In this paper,
                 we study the problem of how to optimally place a set of
                 edges (e.g., edge deletion and edge addition) to
                 optimize the leading eigenvalue of the underlying
                 graph, so that we can guide the dissemination process
                 in a desired way. We propose effective, scalable
                 algorithms for edge deletion and edge addition,
                 respectively. In addition, we reveal the intrinsic
                 relationship between edge deletion and node deletion
                 problems. Experimental results validate the
                 effectiveness and efficiency of the proposed
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2016:STR,
  author =       "Zhiwen Yu and Miao Tian and Zhu Wang and Bin Guo and
                 Tao Mei",
  title =        "Shop-Type Recommendation Leveraging the Data from
                 Social Media and Location-Based Services",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "1:1--1:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2930671",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "It is an important yet challenging task for investors
                 to determine the most suitable type of shop (e.g.,
                 restaurant, fashion) for a newly opened store.
                 Traditional ways are predominantly field surveys and
                 empirical estimation, which are not effective as they
                 lack shop-related data. As social media and
                 location-based services (LBS) are becoming more and
                 more pervasive, user-generated data from these
                 platforms are providing rich information not only about
                 individual consumption experiences, but also about shop
                 attributes. In this paper, we investigate the
                 recommendation of shop types for a given location, by
                 leveraging heterogeneous data that are mainly
                 historical user preferences and location context from
                 social media and LBS. Our goal is to select the most
                 suitable shop type, seeking to maximize the number of
                 customers served from a candidate set of types. We
                 propose a novel bias learning matrix factorization
                 method with feature fusion for shop popularity
                 prediction. Features are defined and extracted from two
                 perspectives: location, where features are closely
                 related to location characteristics, and commercial,
                 where features are about the relationships between
                 shops in the neighborhood. Experimental results show
                 that the proposed method outperforms state-of-the-art
                 solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{McDowell:2016:LNA,
  author =       "Luke K. McDowell and David W. Aha",
  title =        "Leveraging Neighbor Attributes for Classification in
                 Sparsely Labeled Networks",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "2:1--2:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2898358",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Many analysis tasks involve linked nodes, such as
                 people connected by friendship links. Research on
                 link-based classification (LBC) has studied how to
                 leverage these connections to improve classification
                 accuracy. Most such prior research has assumed the
                 provision of a densely labeled training network.
                 Instead, this article studies the common and
                 challenging case when LBC must use a single sparsely
                 labeled network for both learning and inference, a case
                 where existing methods often yield poor accuracy. To
                 address this challenge, we introduce a novel method
                 that enables prediction via ``neighbor attributes,''
                 which were briefly considered by early LBC work but
                 then abandoned due to perceived problems. We then
                 explain, using both extensive experiments and loss
                 decomposition analysis, how using neighbor attributes
                 often significantly improves accuracy. We further show
                 that using appropriate semi-supervised learning (SSL)
                 is essential to obtaining the best accuracy in this
                 domain and that the gains of neighbor attributes remain
                 across a range of SSL choices and data conditions.
                 Finally, given the challenges of label sparsity for LBC
                 and the impact of neighbor attributes, we show that
                 multiple previous studies must be re-considered,
                 including studies regarding the best model features,
                 the impact of noisy attributes, and strategies for
                 active learning.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chang:2016:CSP,
  author =       "Xiaojun Chang and Feiping Nie and Yi Yang and Chengqi
                 Zhang and Heng Huang",
  title =        "Convex Sparse {PCA} for Unsupervised Feature
                 Learning",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "3:1--3:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2910585",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Principal component analysis (PCA) has been widely
                 applied to dimensionality reduction and data
                 pre-processing for different applications in
                 engineering, biology, social science, and the like.
                 Classical PCA and its variants seek for linear
                 projections of the original variables to obtain the
                 low-dimensional feature representations with maximal
                 variance. One limitation is that it is difficult to
                 interpret the results of PCA. Besides, the classical
                 PCA is vulnerable to certain noisy data. In this paper,
                 we propose a Convex Sparse Principal Component Analysis
                 (CSPCA) algorithm and apply it to feature learning.
                 First, we show that PCA can be formulated as a low-rank
                 regression optimization problem. Based on the
                 discussion, the $ l_{2, 1}$-norm minimization is
                 incorporated into the objective function to make the
                 regression coefficients sparse, thereby robust to the
                 outliers. Also, based on the sparse model used in
                 CSPCA, an optimal weight is assigned to each of the
                 original feature, which in turn provides the output
                 with good interpretability. With the output of our
                 CSPCA, we can effectively analyze the importance of
                 each feature under the PCA criteria. Our new objective
                 function is convex, and we propose an iterative
                 algorithm to optimize it. We apply the CSPCA algorithm
                 to feature selection and conduct extensive experiments
                 on seven benchmark datasets. Experimental results
                 demonstrate that the proposed algorithm outperforms
                 state-of-the-art unsupervised feature selection
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2016:LLR,
  author =       "Ou Wu and Qiang You and Fen Xia and Lei Ma and Weiming
                 Hu",
  title =        "Listwise Learning to Rank from Crowds",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "4:1--4:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2910586",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Learning to rank has received great attention in
                 recent years as it plays a crucial role in many
                 applications such as information retrieval and data
                 mining. The existing concept of learning to rank
                 assumes that each training instance is associated with
                 a reliable label. However, in practice, this assumption
                 does not necessarily hold true as it may be infeasible
                 or remarkably expensive to obtain reliable labels for
                 many learning to rank applications. Therefore, a
                 feasible approach is to collect labels from crowds and
                 then learn a ranking function from crowdsourcing
                 labels. This study explores the listwise learning to
                 rank with crowdsourcing labels obtained from multiple
                 annotators, who may be unreliable. A new probabilistic
                 ranking model is first proposed by combining two
                 existing models. Subsequently, a ranking function is
                 trained by proposing a maximum likelihood learning
                 approach, which estimates ground-truth labels and
                 annotator expertise, and trains the ranking function
                 iteratively. In practical crowdsourcing machine
                 learning, valuable side information (e.g., professional
                 grades) about involved annotators is normally
                 attainable. Therefore, this study also investigates
                 learning to rank from crowd labels when side
                 information on the expertise of involved annotators is
                 available. In particular, three basic types of side
                 information are investigated, and corresponding
                 learning algorithms are consequently introduced.
                 Further, the top-k learning to rank from crowdsourcing
                 labels are explored to deal with long training ranking
                 lists. The proposed algorithms are tested on both
                 synthetic and real-world data. Results reveal that the
                 maximum likelihood estimation approach significantly
                 outperforms the average approach and existing
                 crowdsourcing regression methods. The performances of
                 the proposed algorithms are comparable to those of the
                 learning model in consideration reliable labels. The
                 results of the investigation further indicate that side
                 information is helpful in inferring both ranking
                 functions and expertise degrees of annotators.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shao:2016:SCI,
  author =       "Junming Shao and Qinli Yang and Hoang-Vu Dang and
                 Bertil Schmidt and Stefan Kramer",
  title =        "Scalable Clustering by Iterative Partitioning and
                 Point Attractor Representation",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "5:1--5:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2934688",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering very large datasets while preserving
                 cluster quality remains a challenging data-mining task
                 to date. In this paper, we propose an effective
                 scalable clustering algorithm for large datasets that
                 builds upon the concept of synchronization. Inherited
                 from the powerful concept of synchronization, the
                 proposed algorithm, CIPA (Clustering by Iterative
                 Partitioning and Point Attractor Representations), is
                 capable of handling very large datasets by iteratively
                 partitioning them into thousands of subsets and
                 clustering each subset separately. Using dynamic
                 clustering by synchronization, each subset is then
                 represented by a set of point attractors and outliers.
                 Finally, CIPA identifies the cluster structure of the
                 original dataset by clustering the newly generated
                 dataset consisting of points attractors and outliers
                 from all subsets. We demonstrate that our new scalable
                 clustering approach has several attractive benefits:
                 (a) CIPA faithfully captures the cluster structure of
                 the original data by performing clustering on each
                 separate data iteratively instead of using any sampling
                 or statistical summarization technique. (b) It allows
                 clustering very large datasets efficiently with high
                 cluster quality. (c) CIPA is parallelizable and also
                 suitable for distributed data. Extensive experiments
                 demonstrate the effectiveness and efficiency of our
                 approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Grabocka:2016:LTS,
  author =       "Josif Grabocka and Nicolas Schilling and Lars
                 Schmidt-Thieme",
  title =        "Latent Time-Series Motifs",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "6:1--6:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2940329",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Motifs are the most repetitive/frequent patterns of a
                 time-series. The discovery of motifs is crucial for
                 practitioners in order to understand and interpret the
                 phenomena occurring in sequential data. Currently,
                 motifs are searched among series sub-sequences, aiming
                 at selecting the most frequently occurring ones.
                 Search-based methods, which try out series sub-sequence
                 as motif candidates, are currently believed to be the
                 best methods in finding the most frequent patterns.
                 However, this paper proposes an entirely new
                 perspective in finding motifs. We demonstrate that
                 searching is non-optimal since the domain of motifs is
                 restricted, and instead we propose a principled
                 optimization approach able to find optimal motifs. We
                 treat the occurrence frequency as a function and
                 time-series motifs as its parameters, therefore we
                 learn the optimal motifs that maximize the frequency
                 function. In contrast to searching, our method is able
                 to discover the most repetitive patterns (hence
                 optimal), even in cases where they do not explicitly
                 occur as sub-sequences. Experiments on several
                 real-life time-series datasets show that the motifs
                 found by our method are highly more frequent than the
                 ones found through searching, for exactly the same
                 distance threshold.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2016:SNE,
  author =       "Xianchao Zhang and Linlin Zong and Quanzeng You and
                 Xing Yong",
  title =        "Sampling for {Nystr{\"o}m} Extension-Based Spectral
                 Clustering: Incremental Perspective and Novel
                 Analysis",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "7:1--7:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2934693",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Sampling is the key aspect for Nystr{\"o}m extension
                 based spectral clustering. Traditional sampling schemes
                 select the set of landmark points on a whole and focus
                 on how to lower the matrix approximation error.
                 However, the matrix approximation error does not have
                 direct impact on the clustering performance. In this
                 article, we propose a sampling framework from an
                 incremental perspective, i.e., the landmark points are
                 selected one by one, and each next point to be sampled
                 is determined by previously selected landmark points.
                 Incremental sampling builds explicit relationships
                 among landmark points; thus, they work together well
                 and provide a theoretical guarantee on the clustering
                 performance. We provide two novel analysis methods and
                 propose two schemes for selecting-the-next-one of the
                 framework. The first scheme is based on clusterability
                 analysis, which provides a better guarantee on
                 clustering performance than schemes based on matrix
                 approximation error analysis. The second scheme is
                 based on loss analysis, which provides maximized
                 predictive ability of the landmark points on the
                 (implicit) labels of the unsampled points. Experimental
                 results on a wide range of benchmark datasets
                 demonstrate the superiorities of our proposed
                 incremental sampling schemes over existing sampling
                 schemes.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qiao:2016:FST,
  author =       "Maoying Qiao and Richard Yi Da Xu and Wei Bian and
                 Dacheng Tao",
  title =        "Fast Sampling for Time-Varying Determinantal Point
                 Processes",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "8:1--8:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2943785",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Determinantal Point Processes (DPPs) are stochastic
                 models which assign each subset of a base dataset with
                 a probability proportional to the subset's degree of
                 diversity. It has been shown that DPPs are particularly
                 appropriate in data subset selection and summarization
                 (e.g., news display, video summarizations). DPPs prefer
                 diverse subsets while other conventional models cannot
                 offer. However, DPPs inference algorithms have a
                 polynomial time complexity which makes it difficult to
                 handle large and time-varying datasets, especially when
                 real-time processing is required. To address this
                 limitation, we developed a fast sampling algorithm for
                 DPPs which takes advantage of the nature of some
                 time-varying data (e.g., news corpora updating,
                 communication network evolving), where the data changes
                 between time stamps are relatively small. The proposed
                 algorithm is built upon the simplification of marginal
                 density functions over successive time stamps and the
                 sequential Monte Carlo (SMC) sampling technique.
                 Evaluations on both a real-world news dataset and the
                 Enron Corpus confirm the efficiency of the proposed
                 algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Crescenzi:2016:GIO,
  author =       "Pierluigi Crescenzi and Gianlorenzo D'angelo and
                 Lorenzo Severini and Yllka Velaj",
  title =        "Greedily Improving Our Own Closeness Centrality in a
                 Network",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "9:1--9:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2953882",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The closeness centrality is a well-known measure of
                 importance of a vertex within a given complex network.
                 Having high closeness centrality can have positive
                 impact on the vertex itself: hence, in this paper we
                 consider the optimization problem of determining how
                 much a vertex can increase its centrality by creating a
                 limited amount of new edges incident to it. We will
                 consider both the undirected and the directed graph
                 cases. In both cases, we first prove that the
                 optimization problem does not admit a polynomial-time
                 approximation scheme (unless P = NP), and then propose
                 a greedy approximation algorithm (with an almost tight
                 approximation ratio), whose performance is then tested
                 on synthetic graphs and real-world networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2016:CBN,
  author =       "Xiang Li and Charles X. Ling and Huaimin Wang",
  title =        "The Convergence Behavior of Naive {Bayes} on Large
                 Sparse Datasets",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "10:1--10:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2948068",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Large and sparse datasets with a lot of missing values
                 are common in the big data era, such as user behaviors
                 over a large number of items. Classification in such
                 datasets is an important topic for machine learning and
                 data mining. Practically, naive Bayes is still a
                 popular classification algorithm for large sparse
                 datasets, as its time and space complexity scales
                 linearly with the size of non-missing values. However,
                 several important questions about the behavior of naive
                 Bayes are yet to be answered. For example, how
                 different mechanisms of data missing, data sparsity,
                 and the number of attributes systematically affect the
                 learning curves and convergence? In this paper, we
                 address several common data missing mechanisms and
                 propose novel data generation methods based on these
                 mechanisms. We generate large and sparse data
                 systematically, and study the entire AUC (Area Under
                 ROC Curve) learning curve and convergence behavior of
                 naive Bayes. We not only have several important
                 experiment observations, but also provide detailed
                 theoretic studies. Finally, we summarize our empirical
                 and theoretic results as an intuitive decision
                 flowchart and a useful guideline for classifying large
                 sparse datasets in practice.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fu:2016:MGD,
  author =       "Yanjie Fu and Hui Xiong and Yong Ge and Yu Zheng and
                 Zijun Yao and Zhi-Hua Zhou",
  title =        "Modeling of Geographic Dependencies for Real Estate
                 Ranking",
  journal =      j-TKDD,
  volume =       "11",
  number =       "1",
  pages =        "11:1--11:??",
  month =        aug,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2934692",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Aug 29 07:28:30 MDT 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "It is traditionally a challenge for home buyers to
                 understand, compare, and contrast the investment value
                 of real estate. Although a number of appraisal methods
                 have been developed to value real properties, the
                 performances of these methods have been limited by
                 traditional data sources for real estate appraisal.
                 With the development of new ways of collecting
                 estate-related mobile data, there is a potential to
                 leverage geographic dependencies of real estate for
                 enhancing real estate appraisal. Indeed, the geographic
                 dependencies of the investment value of an estate can
                 be from the characteristics of its own neighborhood
                 (individual), the values of its nearby estates (peer),
                 and the prosperity of the affiliated latent business
                 area (zone). To this end, in this paper, we propose a
                 geographic method, named ClusRanking, for real estate
                 appraisal by leveraging the mutual enforcement of
                 ranking and clustering power. ClusRanking is able to
                 exploit geographic individual, peer, and zone
                 dependencies in a probabilistic ranking model.
                 Specifically, we first extract the geographic utility
                 of estates from geography data, estimate the
                 neighborhood popularity of estates by mining taxicab
                 trajectory data, and model the influence of latent
                 business areas. Also, we fuse these three influential
                 factors and predict real estate investment value.
                 Moreover, we simultaneously consider individual, peer
                 and zone dependencies, and derive an estate-specific
                 ranking likelihood as the objective function.
                 Furthermore, we propose an improved method named
                 CR-ClusRanking by incorporating checkin information as
                 a regularization term which reduces the performance
                 volatility of real estate ranking system. Finally, we
                 conduct a comprehensive evaluation with the real
                 estate-related data of Beijing, and the experimental
                 results demonstrate the effectiveness of our proposed
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2016:DAC,
  author =       "Zekai J. Gao and Chris Jermaine",
  title =        "Distributed Algorithms for Computing Very Large
                 Thresholded Covariance Matrices",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "12:1--12:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2935750",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Computation of covariance matrices from observed data
                 is an important problem, as such matrices are used in
                 applications such as principal component analysis
                 (PCA), linear discriminant analysis (LDA), and
                 increasingly in the learning and application of
                 probabilistic graphical models. However, computing an
                 empirical covariance matrix is not always an easy
                 problem. There are two key difficulties associated with
                 computing such a matrix from a very high-dimensional
                 dataset. The first problem is over-fitting. For a
                 $p$-dimensional covariance matrix, there are $ p(p - 1)
                 / 2$ unique, off-diagonal entries in the empirical
                 covariance matrix $S$ for large $p$ (say, $ p > 10^5$),
                 the size $n$ of the dataset is often much smaller than
                 the number of covariances to compute. Over-fitting is a
                 concern in any situation in which the number of
                 parameters learned can greatly exceed the size of the
                 dataset. Thus, there are strong theoretical reasons to
                 expect that for high-dimensional data-even Gaussian
                 data-the empirical covariance matrix is not a good
                 estimate for the true covariance matrix underlying the
                 generative process. The second problem is
                 computational. Computing a covariance matrix takes $
                 O(n p^2)$ time. For large $p$ (greater than 10,000) and
                 $n$ much greater than $p$, this is debilitating. In
                 this article, we consider how both of these
                 difficulties can be handled simultaneously.
                 Specifically, a key regularization technique for
                 high-dimensional covariance estimation is thresholding,
                 in which the smallest or least significant entries in
                 the covariance matrix are simply dropped and replaced
                 with the value $0$. This suggests an obvious way to
                 address the computational difficulty as well: First,
                 compute the identities of the $K$ entries in the
                 covariance matrix that are actually important in the
                 sense that they will not be removed during
                 thresholding, and then in a second step, compute the
                 values of those entries. This can be done in $ O(K n)$
                 time. If $ K \ll p^2$ and the identities of the
                 important entries can be computed in reasonable time,
                 then this is a big win. The key technical contribution
                 of this article is the design and implementation of two
                 different distributed algorithms for approximating the
                 identities of the important entries quickly, using
                 sampling. We have implemented these methods and tested
                 them using an 800-core compute cluster. Experiments
                 have been run using real datasets having millions of
                 data points and up to 40,000 dimensions. These
                 experiments show that the proposed methods are both
                 accurate and efficient.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2016:WKI,
  author =       "Chenguang Wang and Yangqiu Song and Dan Roth and Ming
                 Zhang and Jiawei Han",
  title =        "World Knowledge as Indirect Supervision for Document
                 Clustering",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "13:1--13:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2953881",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "One of the key obstacles in making learning protocols
                 realistic in applications is the need to supervise
                 them, a costly process that often requires hiring
                 domain experts. We consider the framework to use the
                 world knowledge as indirect supervision. World
                 knowledge is general-purpose knowledge, which is not
                 designed for any specific domain. Then, the key
                 challenges are how to adapt the world knowledge to
                 domains and how to represent it for learning. In this
                 article, we provide an example of using world knowledge
                 for domain-dependent document clustering. We provide
                 three ways to specify the world knowledge to domains by
                 resolving the ambiguity of the entities and their
                 types, and represent the data with world knowledge as a
                 heterogeneous information network. Then, we propose a
                 clustering algorithm that can cluster multiple types
                 and incorporate the sub-type information as
                 constraints. In the experiments, we use two existing
                 knowledge bases as our sources of world knowledge. One
                 is Freebase, which is collaboratively collected
                 knowledge about entities and their organizations. The
                 other is YAGO2, a knowledge base automatically
                 extracted from Wikipedia and maps knowledge to the
                 linguistic knowledge base, WordNet. Experimental
                 results on two text benchmark datasets (20newsgroups
                 and RCV1) show that incorporating world knowledge as
                 indirect supervision can significantly outperform the
                 state-of-the-art clustering algorithms as well as
                 clustering algorithms enhanced with world knowledge
                 features. A preliminary version of this work appeared
                 in the proceedings of KDD 2015 [Wang et al. 2015a].
                 This journal version has made several major
                 improvements. First, we have proposed a new and general
                 learning framework for machine learning with world
                 knowledge as indirect supervision, where document
                 clustering is a special case in the original paper.
                 Second, in order to make our unsupervised semantic
                 parsing method more understandable, we add several real
                 cases from the original sentences to the resulting
                 logic forms with all the necessary information. Third,
                 we add details of the three semantic filtering methods
                 and conduct deep analysis of the three semantic
                 filters, by using case studies to show why the
                 conceptualization-based semantic filter can produce
                 more accurate indirect supervision. Finally, in
                 addition to the experiment on 20 newsgroup data and
                 Freebase, we have extended the experiments on
                 clustering results by using all the combinations of
                 text (20 newsgroup, MCAT, CCAT, ECAT) and world
                 knowledge sources (Freebase, YAGO2).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chakraborty:2016:PCS,
  author =       "Tanmoy Chakraborty and Sriram Srinivasan and Niloy
                 Ganguly and Animesh Mukherjee and Sanjukta Bhowmick",
  title =        "Permanence and Community Structure in Complex
                 Networks",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "14:1--14:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2953883",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The goal of community detection algorithms is to
                 identify densely connected units within large networks.
                 An implicit assumption is that all the constituent
                 nodes belong equally to their associated community.
                 However, some nodes are more important in the community
                 than others. To date, efforts have been primarily made
                 to identify communities as a whole, rather than
                 understanding to what extent an individual node belongs
                 to its community. Therefore, most metrics for
                 evaluating communities, for example modularity, are
                 global. These metrics produce a score for each
                 community, not for each individual node. In this
                 article, we argue that the belongingness of nodes in a
                 community is not uniform. We quantify the degree of
                 belongingness of a vertex within a community by a new
                 vertex-based metric called permanence. The central idea
                 of permanence is based on the observation that the
                 strength of membership of a vertex to a community
                 depends upon two factors (i) the extent of connections
                 of the vertex within its community versus outside its
                 community, and (ii) how tightly the vertex is connected
                 internally. We present the formulation of permanence
                 based on these two quantities. We demonstrate that
                 compared to other existing metrics (such as modularity,
                 conductance, and cut-ratio), the change in permanence
                 is more commensurate to the level of perturbation in
                 ground-truth communities. We discuss how permanence can
                 help us understand and utilize the structure and
                 evolution of communities by demonstrating that it can
                 be used to --- (i) measure the persistence of a vertex
                 in a community, (ii) design strategies to strengthen
                 the community structure, (iii) explore the
                 core-periphery structure within a community, and (iv)
                 select suitable initiators for message spreading. We
                 further show that permanence is an excellent metric for
                 identifying communities. We demonstrate that the
                 process of maximizing permanence (abbreviated as
                 MaxPerm) produces meaningful communities that concur
                 with the ground-truth community structure of the
                 networks more accurately than eight other popular
                 community detection algorithms. Finally, we provide
                 mathematical proofs to demonstrate the correctness of
                 finding communities by maximizing permanence. In
                 particular, we show that the communities obtained by
                 this method are (i) less affected by the changes in
                 vertex ordering, and (ii) more resilient to resolution
                 limit, degeneracy of solutions, and asymptotic growth
                 of values.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Smith:2016:PNN,
  author =       "Laura M. Smith and Linhong Zhu and Kristina Lerman and
                 Allon G. Percus",
  title =        "Partitioning Networks with Node Attributes by
                 Compressing Information Flow",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "15:1--15:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2968451",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Real-world networks are often organized as modules or
                 communities of similar nodes that serve as functional
                 units. These networks are also rich in content, with
                 nodes having distinguished features or attributes. In
                 order to discover a network's modular structure, it is
                 necessary to take into account not only its links but
                 also node attributes. We describe an
                 information-theoretic method that identifies modules by
                 compressing descriptions of information flow on a
                 network. Our formulation introduces node content into
                 the description of information flow, which we then
                 minimize to discover groups of nodes with similar
                 attributes that also tend to trap the flow of
                 information. The method is conceptually simple and does
                 not require ad-hoc parameters to specify the number of
                 modules or to control the relative contribution of
                 links and node attributes to network structure. We
                 apply the proposed method to partition real-world
                 networks with known community structure. We demonstrate
                 that adding node attributes helps recover the
                 underlying community structure in content-rich networks
                 more effectively than using links alone. In addition,
                 we show that our method is faster and more accurate
                 than alternative state-of-the-art algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2016:SAO,
  author =       "Kui Yu and Xindong Wu and Wei Ding and Jian Pei",
  title =        "Scalable and Accurate Online Feature Selection for Big
                 Data",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "16:1--16:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2976744",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Feature selection is important in many big data
                 applications. Two critical challenges closely associate
                 with big data. First, in many big data applications,
                 the dimensionality is extremely high, in millions, and
                 keeps growing. Second, big data applications call for
                 highly scalable feature selection algorithms in an
                 online manner such that each feature can be processed
                 in a sequential scan. We present SAOLA, a {Scalable and
                 Accurate On Line Approach} for feature selection in
                 this paper. With a theoretical analysis on bounds of
                 the pairwise correlations between features, SAOLA
                 employs novel pairwise comparison techniques and
                 maintains a parsimonious model over time in an online
                 manner. Furthermore, to deal with upcoming features
                 that arrive by groups, we extend the SAOLA algorithm,
                 and then propose a new group-SAOLA algorithm for online
                 group feature selection. The group-SAOLA algorithm can
                 online maintain a set of feature groups that is sparse
                 at the levels of both groups and individual features
                 simultaneously. An empirical study using a series of
                 benchmark real datasets shows that our two algorithms,
                 SAOLA and group-SAOLA, are scalable on datasets of
                 extremely high dimensionality and have superior
                 performance over the state-of-the-art feature selection
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2016:SAU,
  author =       "Bin Liu and Yao Wu and Neil Zhenqiang Gong and Junjie
                 Wu and Hui Xiong and Martin Ester",
  title =        "Structural Analysis of User Choices for Mobile App
                 Recommendation",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "17:1--17:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2983533",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Advances in smartphone technology have promoted the
                 rapid development of mobile apps. However, the
                 availability of a huge number of mobile apps in
                 application stores has imposed the challenge of finding
                 the right apps to meet the user needs. Indeed, there is
                 a critical demand for personalized app recommendations.
                 Along this line, there are opportunities and challenges
                 posed by two unique characteristics of mobile apps.
                 First, app markets have organized apps in a
                 hierarchical taxonomy. Second, apps with similar
                 functionalities are competing with each other. Although
                 there are a variety of approaches for mobile app
                 recommendations, these approaches do not have a focus
                 on dealing with these opportunities and challenges. To
                 this end, in this article, we provide a systematic
                 study for addressing these challenges. Specifically, we
                 develop a structural user choice model (SUCM) to learn
                 fine-grained user preferences by exploiting the
                 hierarchical taxonomy of apps as well as the
                 competitive relationships among apps. Moreover, we
                 design an efficient learning algorithm to estimate the
                 parameters for the SUCM model. Finally, we perform
                 extensive experiments on a large app adoption dataset
                 collected from Google Play. The results show that SUCM
                 consistently outperforms state-of-the-art Top-N
                 recommendation methods by a significant margin.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Afrati:2016:APD,
  author =       "Foto Afrati and Shlomi Dolev and Ephraim Korach and
                 Shantanu Sharma and Jeffrey D. Ullman",
  title =        "Assignment Problems of Different-Sized Inputs in
                 {MapReduce}",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "18:1--18:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2987376",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A MapReduce algorithm can be described by a mapping
                 schema, which assigns inputs to a set of reducers, such
                 that for each required output there exists a reducer
                 that receives all the inputs participating in the
                 computation of this output. Reducers have a capacity
                 that limits the sets of inputs they can be assigned.
                 However, individual inputs may vary in terms of size.
                 We consider, for the first time, mapping schemas where
                 input sizes are part of the considerations and
                 restrictions. One of the significant parameters to
                 optimize in any MapReduce job is communication cost
                 between the map and reduce phases. The communication
                 cost can be optimized by minimizing the number of
                 copies of inputs sent to the reducers. The
                 communication cost is closely related to the number of
                 reducers of constrained capacity that are used to
                 accommodate appropriately the inputs, so that the
                 requirement of how the inputs must meet in a reducer is
                 satisfied. In this work, we consider a family of
                 problems where it is required that each input meets
                 with each other input in at least one reducer. We also
                 consider a slightly different family of problems in
                 which each input of a list, X, is required to meet each
                 input of another list, Y, in at least one reducer. We
                 prove that finding an optimal mapping schema for these
                 families of problems is NP-hard, and present a
                 bin-packing-based approximation algorithm for finding a
                 near optimal mapping schema.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2016:UHM,
  author =       "Zhongyuan Wang and Fang Wang and Haixun Wang and
                 Zhirui Hu and Jun Yan and Fangtao Li and Ji-Rong Wen
                 and Zhoujun Li",
  title =        "Unsupervised Head-Modifier Detection in Search
                 Queries",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "19:1--19:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2988235",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Interpreting the user intent in search queries is a
                 key task in query understanding. Query intent
                 classification has been widely studied. In this
                 article, we go one step further to understand the query
                 from the view of head-modifier analysis. For example,
                 given the query ``popular iphone 5 smart cover,''
                 instead of using coarse-grained semantic classes (e.g.,
                 find electronic product), we interpret that ``smart
                 cover'' is the head or the intent of the query and
                 ``iphone 5'' is its modifier. Query head-modifier
                 detection can help search engines to obtain
                 particularly relevant content, which is also important
                 for applications such as ads matching and query
                 recommendation. We introduce an unsupervised semantic
                 approach for query head-modifier detection. First, we
                 mine a large number of instance level head-modifier
                 pairs from search log. Then, we develop a
                 conceptualization mechanism to generalize the instance
                 level pairs to concept level. Finally, we derive
                 weighted concept patterns that are concise, accurate,
                 and have strong generalization power in head-modifier
                 detection. The developed mechanism has been used in
                 production for search relevance and ads matching. We
                 use extensive experiment results to demonstrate the
                 effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chang:2016:LMB,
  author =       "Yi Chang and Makoto Yamada and Antonio Ortega and Yan
                 Liu",
  title =        "Lifecycle Modeling for Buzz Temporal Pattern
                 Discovery",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "20:1--20:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2994605",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In social media analysis, one critical task is
                 detecting a burst of topics or buzz, which is reflected
                 by extremely frequent mentions of certain keywords in a
                 short-time interval. Detecting buzz not only provides
                 useful insights into the information propagation
                 mechanism, but also plays an essential role in
                 preventing malicious rumors. However, buzz modeling is
                 a challenging task because a buzz time-series often
                 exhibits sudden spikes and heavy tails, wherein most
                 existing time-series models fail. In this article, we
                 propose novel buzz modeling approaches that capture the
                 rise and fade temporal patterns via Product Lifecycle
                 (PLC) model, a classical concept in economics. More
                 specifically, we propose to model multiple peaks in
                 buzz time-series with PLC mixture or PLC group mixture
                 and develop a probabilistic graphical model (K-Mixture
                 of Product Lifecycle) (K-MPLC) to automatically
                 discover inherent lifecycle patterns within a
                 collection of buzzes. Furthermore, we effectively
                 utilize the model parameters of PLC mixture or PLC
                 group mixture for burst prediction. Our experimental
                 results show that our proposed methods significantly
                 outperform existing leading approaches on buzz
                 clustering and buzz-type prediction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wei:2016:NBG,
  author =       "Qiang Wei and Dandan Qiao and Jin Zhang and Guoqing
                 Chen and Xunhua Guo",
  title =        "A Novel Bipartite Graph Based Competitiveness Degree
                 Analysis from Query Logs",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "21:1--21:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996196",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Competitiveness degree analysis is a focal point of
                 business strategy and competitive intelligence, aimed
                 to help managers closely monitor to what extent their
                 rivals are competing with them. This article proposes a
                 novel method, namely BCQ, to measure the
                 competitiveness degree between peers from query logs as
                 an important form of user generated contents, which
                 reflects the ``wisdom of crowds'' from the search
                 engine users' perspective. In doing so, a bipartite
                 graph model is developed to capture the competitive
                 relationships through conjoint attributes hidden in
                 query logs, where the notion of competitiveness degree
                 for entity pairs is introduced, and then used to
                 identify the competitive paths mapped in the bipartite
                 graph. Subsequently, extensive experiments are
                 conducted to demonstrate the effectiveness of BCQ to
                 quantify the competitiveness degrees. Experimental
                 results reveal that BCQ can well support competitors
                 ranking, which is helpful for devising competitive
                 strategies and pursuing market performance. In
                 addition, efficiency experiments on synthetic data show
                 a good scalability of BCQ on large scale of query
                 logs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pei:2016:CCP,
  author =       "Yuanli Pei and Xiaoli Z. Fern and Teresa Vania Tjahja
                 and R{\'o}mer Rosales",
  title =        "Comparing Clustering with Pairwise and Relative
                 Constraints: a Unified Framework",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "22:1--22:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996467",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering can be improved with the help of side
                 information about the similarity relationships among
                 instances. Such information has been commonly
                 represented by two types of constraints: pairwise
                 constraints and relative constraints, regarding
                 similarities about instance pairs and triplets,
                 respectively. Prior work has mostly considered these
                 two types of constraints separately and developed
                 individual algorithms to learn from each type. In
                 practice, however, it is critical to understand/compare
                 the usefulness of the two types of constraints as well
                 as the cost of acquiring them, which has not been
                 studied before. This paper provides an extensive
                 comparison of clustering with these two types of
                 constraints. Specifically, we compare their impacts
                 both on human users that provide such constraints and
                 on the learning system that incorporates such
                 constraints into clustering. In addition, to ensure
                 that the comparison of clustering is performed on equal
                 ground (without the potential bias introduced by
                 different learning algorithms), we propose a
                 probabilistic semi-supervised clustering framework that
                 can learn from either type of constraints. Our
                 experiments demonstrate that the proposed
                 semi-supervised clustering framework is highly
                 effective at utilizing both types of constraints to aid
                 clustering. Our user study provides valuable insights
                 regarding the impact of the constraints on human users,
                 and our experiments on clustering with the
                 human-labeled constraints reveal that relative
                 constraint is often more efficient at improving
                 clustering.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lorenzetti:2016:MTS,
  author =       "Carlos Lorenzetti and Ana Maguitman and David Leake
                 and Filippo Menczer and Thomas Reichherzer",
  title =        "Mining for Topics to Suggest Knowledge Model
                 Extensions",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "23:1--23:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2997657",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Electronic concept maps, interlinked with other
                 concept maps and multimedia resources, can provide rich
                 knowledge models to capture and share human knowledge.
                 This article presents and evaluates methods to support
                 experts as they extend existing knowledge models, by
                 suggesting new context-relevant topics mined from Web
                 search engines. The task of generating topics to
                 support knowledge model extension raises two research
                 questions: first, how to extract topic descriptors and
                 discriminators from concept maps; and second, how to
                 use these topic descriptors and discriminators to
                 identify candidate topics on the Web with the right
                 balance of novelty and relevance. To address these
                 questions, this article first develops the theoretical
                 framework required for a ``topic suggester'' to aid
                 information search in the context of a knowledge model
                 under construction. It then presents and evaluates
                 algorithms based on this framework and applied in
                 Extender, an implemented tool for topic suggestion.
                 Extender has been developed and tested within
                 CmapTools, a widely used system for supporting
                 knowledge modeling using concept maps. However, the
                 generality of the algorithms makes them applicable to a
                 broad class of knowledge modeling systems, and to Web
                 search in general.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kumar:2016:ACT,
  author =       "Dheeraj Kumar and James C. Bezdek and Sutharshan
                 Rajasegarar and Marimuthu Palaniswami and Christopher
                 Leckie and Jeffrey Chan and Jayavardhana Gubbi",
  title =        "Adaptive Cluster Tendency Visualization and Anomaly
                 Detection for Streaming Data",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "24:1--24:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2997656",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The growth in pervasive network infrastructure called
                 the Internet of Things (IoT) enables a wide range of
                 physical objects and environments to be monitored in
                 fine spatial and temporal detail. The detailed, dynamic
                 data that are collected in large quantities from sensor
                 devices provide the basis for a variety of
                 applications. Automatic interpretation of these
                 evolving large data is required for timely detection of
                 interesting events. This article develops and
                 exemplifies two new relatives of the visual assessment
                 of tendency (VAT) and improved visual assessment of
                 tendency (iVAT) models, which uses cluster heat maps to
                 visualize structure in static datasets. One new model
                 is initialized with a static VAT/iVAT image, and then
                 incrementally (hence inc-VAT/inc-iVAT) updates the
                 current minimal spanning tree (MST) used by VAT with an
                 efficient edge insertion scheme. Similarly,
                 dec-VAT/dec-iVAT efficiently removes a node from the
                 current VAT MST. A sequence of inc-iVAT/dec-iVAT images
                 can be used for (visual) anomaly detection in evolving
                 data streams and for sliding window based cluster
                 assessment for time series data. The method is
                 illustrated with four real datasets (three of them
                 being smart city IoT data). The evaluation demonstrates
                 the algorithms' ability to successfully isolate
                 anomalies and visualize changing cluster structure in
                 the streaming data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2016:EVM,
  author =       "Wen-Yuan Zhu and Wen-Chih Peng and Ling-Jyh Chen and
                 Kai Zheng and Xiaofang Zhou",
  title =        "Exploiting Viral Marketing for Location Promotion in
                 Location-Based Social Networks",
  journal =      j-TKDD,
  volume =       "11",
  number =       "2",
  pages =        "25:1--25:??",
  month =        dec,
  year =         "2016",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3001938",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Dec 26 17:17:00 MST 2016",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the explosion of smartphones and social network
                 services, location-based social networks (LBSNs) are
                 increasingly seen as tools for businesses (e.g.,
                 restaurants and hotels) to promote their products and
                 services. In this article, we investigate the key
                 techniques that can help businesses promote their
                 locations by advertising wisely through the underlying
                 LBSNs. In order to maximize the benefit of location
                 promotion, we formalize it as an influence maximization
                 problem in an LBSN, i.e., given a target location and
                 an LBSN, a set of k users (called seeds) should be
                 advertised initially such that they can successfully
                 propagate and attract many other users to visit the
                 target location. Existing studies have proposed
                 different ways to calculate the information propagation
                 probability, that is, how likely it is that a user may
                 influence another, in the setting of a static social
                 network. However, it is more challenging to derive the
                 propagation probability in an LBSN since it is heavily
                 affected by the target location and the user mobility,
                 both of which are dynamic and query dependent. This
                 article proposes two user mobility models, namely the
                 Gaussian-based and distance-based mobility models, to
                 capture the check-in behavior of individual LBSN users,
                 based on which location-aware propagation probabilities
                 can be derived. Extensive experiments based on two real
                 LBSN datasets have demonstrated the superior
                 effectiveness of our proposals compared with existing
                 static models of propagation probabilities to truly
                 reflect the information propagation in LBSNs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sariyuce:2017:GMF,
  author =       "Ahmet Erdem Sariy{\"u}ce and Kamer Kaya and Erik Saule
                 and {\"U}mit V. {\c{C}}ataly{\"u}rek",
  title =        "Graph Manipulations for Fast Centrality Computation",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3022668",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The betweenness and closeness metrics are widely used
                 metrics in many network analysis applications. Yet,
                 they are expensive to compute. For that reason, making
                 the betweenness and closeness centrality computations
                 faster is an important and well-studied problem. In
                 this work, we propose the framework BADIOS that
                 manipulates the graph by compressing it and splitting
                 into pieces so that the centrality computation can be
                 handled independently for each piece. Experimental
                 results show that the proposed techniques can be a
                 great arsenal to reduce the centrality computation time
                 for various types and sizes of networks. In particular,
                 it reduces the betweenness centrality computation time
                 of a 4.6 million edges graph from more than 5 days to
                 less than 16 hours. For the same graph, the closeness
                 computation time is decreased from more than 3 days to
                 6 hours (12.7x speedup).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rozenshtein:2017:FDD,
  author =       "Polina Rozenshtein and Nikolaj Tatti and Aristides
                 Gionis",
  title =        "Finding Dynamic Dense Subgraphs",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046791",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Online social networks are often defined by
                 considering interactions of entities at an aggregate
                 level. For example, a call graph is formed among
                 individuals who have called each other at least once;
                 or at least k times. Similarly, in social-media
                 platforms, we consider implicit social networks among
                 users who have interacted in some way, e.g., have made
                 a conversation, have commented to the content of each
                 other, and so on. Such definitions have been used
                 widely in the literature and they have offered
                 significant insights regarding the structure of social
                 networks. However, it is obvious that they suffer from
                 a severe limitation: They neglect the precise time that
                 interactions among the network entities occur. In this
                 article, we consider interaction networks, where the
                 data description contains not only information about
                 the underlying topology of the social network, but also
                 the exact time instances that network entities
                 interact. In an interaction network, an edge is
                 associated with a timestamp, and multiple edges may
                 occur for the same pair of entities. Consequently,
                 interaction networks offer a more fine-grained
                 representation, which can be leveraged to reveal
                 otherwise hidden dynamic phenomena. In the setting of
                 interaction networks, we study the problem of
                 discovering dynamic dense subgraphs whose edges occur
                 in short time intervals. We view such subgraphs as
                 fingerprints of dynamic activity occurring within
                 network communities. Such communities represent groups
                 of individuals who interact with each other in specific
                 time instances, for example, a group of employees who
                 work on a project and whose interaction intensifies
                 before certain project milestones. We prove that the
                 problem we define is NP -hard, and we provide efficient
                 algorithms by adapting techniques for finding dense
                 subgraphs. We also show how to speed-up the proposed
                 methods by exploiting concavity properties of our
                 objective function and by the means of fractional
                 programming. We perform extensive evaluation of the
                 proposed methods on synthetic and real datasets, which
                 demonstrates the validity of our approach and shows
                 that our algorithms can be used to obtain high-quality
                 results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2017:MBM,
  author =       "Guannan Liu and Yanjie Fu and Guoqing Chen and Hui
                 Xiong and Can Chen",
  title =        "Modeling Buying Motives for Personalized Product
                 Bundle Recommendation",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3022185",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Product bundling is a marketing strategy that offers
                 several products/items for sale as one bundle. While
                 the bundling strategy has been widely used, less
                 efforts have been made to understand how items should
                 be bundled with respect to consumers' preferences and
                 buying motives for product bundles. This article
                 investigates the relationships between the items that
                 are bought together within a product bundle. To that
                 end, each purchased product bundle is formulated as a
                 bundle graph with items as nodes and the associations
                 between pairs of items in the bundle as edges. The
                 relationships between items can be analyzed by the
                 formation of edges in bundle graphs, which can be
                 attributed to the associations of feature aspects.
                 Then, a probabilistic model BPM (Bundle Purchases with
                 Motives) is proposed to capture the composition of each
                 bundle graph, with two latent factors node-type and
                 edge-type introduced to describe the feature aspects
                 and relationships respectively. Furthermore, based on
                 the preferences inferred from the model, an approach
                 for recommending items to form product bundles is
                 developed by estimating the probability that a consumer
                 would buy an associative item together with the item
                 already bought in the shopping cart. Finally,
                 experimental results on real-world transaction data
                 collected from well-known shopping sites show the
                 effectiveness advantages of the proposed approach over
                 other baseline methods. Moreover, the experiments also
                 show that the proposed model can explain consumers'
                 buying motives for product bundles in terms of
                 different node-types and edge-types.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2017:CSN,
  author =       "Ting Guo and Jia Wu and Xingquan Zhu and Chengqi
                 Zhang",
  title =        "Combining Structured Node Content and Topology
                 Information for Networked Graph Clustering",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2996197",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Graphs are popularly used to represent objects with
                 shared dependency relationships. To date, all existing
                 graph clustering algorithms consider each node as a
                 single attribute or a set of independent attributes,
                 without realizing that content inside each node may
                 also have complex structures. In this article, we
                 formulate a new networked graph clustering task where a
                 network contains a set of inter-connected (or
                 networked) super-nodes, each of which is a
                 single-attribute graph. The new super-node
                 representation is applicable to many real-world
                 applications, such as a citation network where each
                 node denotes a paper whose content can be described as
                 a graph, and citation relationships between papers form
                 a networked graph (i.e., a super-graph). Networked
                 graph clustering aims to find similar node groups, each
                 of which contains nodes with similar content and
                 structure information. The main challenge is to
                 properly calculate the similarity between super-nodes
                 for clustering. To solve the problem, we propose to
                 characterize node similarity by integrating structure
                 and content information of each super-node. To measure
                 node content similarity, we use cosine distance by
                 considering overlapped attributes between two
                 super-nodes. To measure structure similarity, we
                 propose an Attributed Random Walk Kernel (ARWK) to
                 calculate the similarity between super-nodes. Detailed
                 node content analysis is also included to build
                 relationships between super-nodes with shared internal
                 structure information, so the structure similarity can
                 be calculated in a precise way. By integrating the
                 structure similarity and content similarity as one
                 matrix, the spectral clustering is used to achieve
                 networked graph clustering. Our method enjoys sound
                 theoretical properties, including bounded similarities
                 and better structure similarity assessment than
                 traditional graph clustering methods. Experiments on
                 real-world applications demonstrate that our method
                 significantly outperforms baseline approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2017:IPV,
  author =       "Qi Liu and Biao Xiang and Nicholas Jing Yuan and
                 Enhong Chen and Hui Xiong and Yi Zheng and Yu Yang",
  title =        "An Influence Propagation View of {PageRank}",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046941",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "For a long time, PageRank has been widely used for
                 authority computation and has been adopted as a solid
                 baseline for evaluating social influence related
                 applications. However, when measuring the authority of
                 network nodes, the traditional PageRank method does not
                 take the nodes' prior knowledge into consideration.
                 Also, the connection between PageRank and social
                 influence modeling methods is not clearly established.
                 To that end, this article provides a focused study on
                 understanding PageRank as well as the relationship
                 between PageRank and social influence analysis. Along
                 this line, we first propose a linear social influence
                 model and reveal that this model generalizes the
                 PageRank-based authority computation by introducing
                 some constraints. Then, we show that the authority
                 computation by PageRank can be enhanced if exploiting
                 more reasonable constraints (e.g., from prior
                 knowledge). Next, to deal with the computational
                 challenge of linear model with general constraints, we
                 provide an upper bound for identifying nodes with top
                 authorities. Moreover, we extend the proposed linear
                 model for better measuring the authority of the given
                 node sets, and we also demonstrate the way to quickly
                 identify the top authoritative node sets. Finally,
                 extensive experimental evaluations on four real-world
                 networks validate the effectiveness of the proposed
                 linear model with respect to different constraint
                 settings. The results show that the methods with more
                 reasonable constraints can lead to better ranking and
                 recommendation performance. Meanwhile, the upper bounds
                 formed by PageRank values could be used to quickly
                 locate the nodes and node sets with the highest
                 authorities.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2017:LMD,
  author =       "Sen Wang and Xue Li and Xiaojun Chang* and Lina Yao
                 and Quan Z. Sheng and Guodong Long",
  title =        "Learning Multiple Diagnosis Codes for {ICU} Patients
                 with Local Disease Correlation Mining",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3003729",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In the era of big data, a mechanism that can
                 automatically annotate disease codes to patients'
                 records in the medical information system is in demand.
                 The purpose of this work is to propose a framework that
                 automatically annotates the disease labels of
                 multi-source patient data in Intensive Care Units
                 (ICUs). We extract features from two main sources,
                 medical charts and notes. The Bag-of-Words model is
                 used to encode the features. Unlike most of the
                 existing multi-label learning algorithms that globally
                 consider correlations between diseases, our model
                 learns disease correlation locally in the patient data.
                 To achieve this, we derive a local disease correlation
                 representation to enrich the discriminant power of each
                 patient data. This representation is embedded into a
                 unified multi-label learning framework. We develop an
                 alternating algorithm to iteratively optimize the
                 objective function. Extensive experiments have been
                 conducted on a real-world ICU database. We have
                 compared our algorithm with representative multi-label
                 learning algorithms. Evaluation results have shown that
                 our proposed method has state-of-the-art performance in
                 the annotation of multiple diagnostic codes for ICU
                 patients. This study suggests that problems in the
                 automated diagnosis code annotation can be reliably
                 addressed by using a multi-label learning model that
                 exploits disease correlation. The findings of this
                 study will greatly benefit health care and management
                 in ICU considering that the automated diagnosis code
                 annotation can significantly improve the quality and
                 management of health care for both patients and
                 caregivers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bae:2017:SEF,
  author =       "Seung-Hee Bae and Daniel Halperin and Jevin D. West
                 and Martin Rosvall and Bill Howe",
  title =        "Scalable and Efficient Flow-Based Community Detection
                 for Large-Scale Graph Analysis",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/2992785",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/pvm.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community detection is an increasingly popular
                 approach to uncover important structures in large
                 networks. Flow-based community detection methods rely
                 on communication patterns of the network rather than
                 structural properties to determine communities. The
                 Infomap algorithm in particular optimizes a novel
                 objective function called the map equation and has been
                 shown to outperform other approaches in third-party
                 benchmarks. However, Infomap and its variants are
                 inherently sequential, limiting their use for
                 large-scale graphs. In this article, we propose a novel
                 algorithm to optimize the map equation called RelaxMap.
                 RelaxMap provides two important improvements over
                 Infomap: parallelization, so that the map equation can
                 be optimized over much larger graphs, and
                 prioritization, so that the most important work occurs
                 first, iterations take less time, and the algorithm
                 converges faster. We implement these techniques using
                 OpenMP on shared-memory multicore systems, and evaluate
                 our approach on a variety of graphs from standard graph
                 clustering benchmarks as well as real graph datasets.
                 Our evaluation shows that both techniques are
                 effective: RelaxMap achieves 70\% parallel efficiency
                 on eight cores, and prioritization improves algorithm
                 performance by an additional 20--50\% on average,
                 depending on the graph properties. Additionally,
                 RelaxMap converges in the similar number of iterations
                 and provides solutions of equivalent quality as the
                 serial Infomap implementation.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Peng:2017:RGR,
  author =       "Chong Peng and Zhao Kang and Yunhong Hu and Jie Cheng
                 and Qiang Cheng",
  title =        "Robust Graph Regularized Nonnegative Matrix
                 Factorization for Clustering",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3003730",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Matrix factorization is often used for data
                 representation in many data mining and machine-learning
                 problems. In particular, for a dataset without any
                 negative entries, nonnegative matrix factorization
                 (NMF) is often used to find a low-rank approximation by
                 the product of two nonnegative matrices. With reduced
                 dimensions, these matrices can be effectively used for
                 many applications such as clustering. The existing
                 methods of NMF are often afflicted with their
                 sensitivity to outliers and noise in the data. To
                 mitigate this drawback, in this paper, we consider
                 integrating NMF into a robust principal component
                 model, and design a robust formulation that effectively
                 captures noise and outliers in the approximation while
                 incorporating essential nonlinear structures. A set of
                 comprehensive empirical evaluations in clustering
                 applications demonstrates that the proposed method has
                 strong robustness to gross errors and superior
                 performance to current state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2017:PSS,
  author =       "Xun Tang and Maha Alabduljalil and Xin Jin and Tao
                 Yang",
  title =        "Partitioned Similarity Search with Cache-Conscious
                 Data Traversal",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3014060",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "All pairs similarity search (APSS) is used in many web
                 search and data mining applications. Previous work has
                 used techniques such as comparison filtering, inverted
                 indexing, and parallel accumulation of partial results.
                 However, shuffling intermediate results can incur
                 significant communication overhead as data scales up.
                 This paper studies a scalable two-phase approach called
                 Partition-based Similarity Search (PSS). The first
                 phase is to partition the data and group vectors that
                 are potentially similar. The second phase is to run a
                 set of tasks where each task compares a partition of
                 vectors with other candidate partitions. Due to data
                 sparsity and the presence of memory hierarchy,
                 accessing feature vectors during the partition
                 comparison phase incurs significant overhead. This
                 paper introduces a cache-conscious design for data
                 layout and traversal to reduce access time through
                 size-controlled data splitting and vector coalescing,
                 and it provides an analysis to guide the choice of
                 optimization parameters. The evaluation results show
                 that for the tested datasets, the proposed approach can
                 lead to an early elimination of unnecessary I/O and
                 data communication while sustaining parallel efficiency
                 with one order of magnitude of performance improvement
                 and it can also be integrated with LSH for approximated
                 APSS.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Feng:2017:RBC,
  author =       "Shanshan Feng and Jian Cao and Jie Wang and Shiyou
                 Qian",
  title =        "Recommendations Based on Comprehensively Exploiting
                 the Latent Factors Hidden in Items' Ratings and
                 Content",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3003728",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "To improve the performance of recommender systems in a
                 practical manner, several hybrid approaches have been
                 developed by considering item ratings and content
                 information simultaneously. However, most of these
                 hybrid approaches make recommendations based on
                 aggregating different recommendation techniques using
                 various strategies, rather than considering joint
                 modeling of the item's ratings and content, and thus
                 fail to detect many latent factors that could
                 potentially improve the performance of the recommender
                 systems. For this reason, these approaches continue to
                 suffer from data sparsity and do not work well for
                 recommending items to individual users. A few studies
                 try to describe a user's preference by detecting items'
                 latent features from content-description texts as
                 compensation for the sparse ratings. Unfortunately,
                 most of these methods are still generally unable to
                 accomplish recommendation tasks well for two reasons:
                 (1) they learn latent factors from text descriptions or
                 user--item ratings independently, rather than combining
                 them together; and (2) influences of latent factors
                 hidden in texts and ratings are not fully explored. In
                 this study, we propose a probabilistic approach that we
                 denote as latent random walk (LRW) based on the
                 combination of an integrated latent topic model and
                 random walk (RW) with the restart method, which can be
                 used to rank items according to expected user
                 preferences by detecting both their explicit and
                 implicit correlative information, in order to recommend
                 top-ranked items to potentially interested users. As
                 presented in this article, the goal of this work is to
                 comprehensively discover latent factors hidden in
                 items' ratings and content in order to alleviate the
                 data sparsity problem and to improve the performance of
                 recommender systems. The proposed topic model provides
                 a generative probabilistic framework that discovers
                 users' implicit preferences and items' latent features
                 simultaneously by exploiting both ratings and item
                 content information. On the basis of this probabilistic
                 framework, RW can predict a user's preference for
                 unrated items by discovering global latent relations.
                 In order to show the efficiency of the proposed
                 approach, we test LRW and other state-of-the-art
                 methods on three real-world datasets, namely,
                 CAMRa2011, Yahoo!, and APP. The experiments indicate
                 that our approach outperforms all comparative methods
                 and, in addition, that it is less sensitive to the data
                 sparsity problem, thus demonstrating the robustness of
                 LRW for recommendation tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2017:SPM,
  author =       "Xutong Liu and Feng Chen and Yen-Cheng Lu and
                 Chang-Tien Lu",
  title =        "Spatial Prediction for Multivariate Non-{Gaussian}
                 Data",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3022669",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the ever increasing volume of geo-referenced
                 datasets, there is a real need for better statistical
                 estimation and prediction techniques for spatial
                 analysis. Most existing approaches focus on predicting
                 multivariate Gaussian spatial processes, but as the
                 data may consist of non-Gaussian (or mixed type)
                 variables, this creates two challenges: (1) how to
                 accurately capture the dependencies among different
                 data types, both Gaussian and non-Gaussian; and (2) how
                 to efficiently predict multivariate non-Gaussian
                 spatial processes. In this article, we propose a
                 generic approach for predicting multiple response
                 variables of mixed types. The proposed approach
                 accurately captures cross-spatial dependencies among
                 response variables and reduces the computational burden
                 by projecting the spatial process to a lower
                 dimensional space with knot-based techniques. Efficient
                 approximations are provided to estimate posterior
                 marginals of latent variables for the predictive
                 process, and extensive experimental evaluations based
                 on both simulation and real-life datasets are provided
                 to demonstrate the effectiveness and efficiency of this
                 new approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2017:MDP,
  author =       "Liang Wang and Zhiwen Yu and Bin Guo and Tao Ku and
                 Fei Yi",
  title =        "Moving Destination Prediction Using Sparse Dataset: a
                 Mobility Gradient Descent Approach",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3051128",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Moving destination prediction offers an important
                 category of location-based applications and provides
                 essential intelligence to business and governments. In
                 existing studies, a common approach to destination
                 prediction is to match the given query trajectory with
                 massive recorded trajectories by similarity
                 calculation. Unfortunately, due to privacy concerns,
                 budget constraints, and many other factors, in most
                 circumstances, we can only obtain a sparse trajectory
                 dataset. In sparse dataset, the available moving
                 trajectories are far from enough to cover all possible
                 query trajectories; thus the predictability of the
                 matching-based approach will decrease remarkably.
                 Toward destination prediction with sparse dataset,
                 instead of searching similar trajectories over the
                 sparse records, we alternatively examine the changes of
                 distances from sampling locations to final destination
                 on query trajectory. The underlying idea is intuitive:
                 It is directly motivated by travel purpose, people
                 always get closer to the final destination during the
                 movement. By borrowing the conception of gradient
                 descent in optimization theory, we propose a novel
                 moving destination prediction approach, namely MGDPre.
                 Building upon the mobility gradient descent, MGDPre
                 only investigates the behavior characteristics of query
                 trajectory itself without matching historical
                 trajectories, and thus is applicable for sparse
                 dataset. We evaluate our approach based on extensive
                 experiments, using GPS trajectories generated by a
                 sample of taxis over a 10-day period in Shenzhen city,
                 China. The results demonstrate that the effectiveness,
                 efficiency, and scalability of our approach outperform
                 state-of-the-art baseline methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fountoulakis:2017:RRA,
  author =       "Kimon Fountoulakis and Abhisek Kundu and Eugenia-Maria
                 Kontopoulou and Petros Drineas",
  title =        "A Randomized Rounding Algorithm for Sparse {PCA}",
  journal =      j-TKDD,
  volume =       "11",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046948",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 24 17:32:52 MDT 2017",
  bibsource =    "http://www.acm.org/pubs/contents/journals/tkdd/;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present and analyze a simple, two-step algorithm to
                 approximate the optimal solution of the sparse PCA
                 problem. In the proposed approach, we first solve an $
                 l_1$-penalized version of the NP-hard sparse PCA
                 optimization problem and then we use a randomized
                 rounding strategy to sparsify the resulting dense
                 solution. Our main theoretical result guarantees an
                 additive error approximation and provides a tradeoff
                 between sparsity and accuracy. Extensive experimental
                 evaluation indicates that the proposed approach is
                 competitive in practice, even compared to
                 state-of-the-art toolboxes such as Spasm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Aggarwal:2017:ISI,
  author =       "Charu C. Aggarwal",
  title =        "Introduction to Special Issue on the Best Papers from
                 {KDD 2016}",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "39:1--39:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092689",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This issue contains the best papers from the ACM KDD
                 Conference 2016. As is customary at KDD, special issue
                 papers are invited only from the research track. The
                 top-ranked papers from the KDD 2016 conference are
                 included in this issue. This issue contains a total of
                 six articles, which are from different areas of data
                 mining. A brief description of these articles is also
                 provided in this article.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2017:RCA,
  author =       "Wei Cheng and Jingchao Ni and Kai Zhang and Haifeng
                 Chen and Guofei Jiang and Yu Shi and Xiang Zhang and
                 Wei Wang",
  title =        "Ranking Causal Anomalies for System Fault Diagnosis
                 via Temporal and Dynamical Analysis on Vanishing
                 Correlations",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "40:1--40:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046946",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Detecting system anomalies is an important problem in
                 many fields such as security, fault management, and
                 industrial optimization. Recently, invariant network
                 has shown to be powerful in characterizing complex
                 system behaviours. In the invariant network, a node
                 represents a system component and an edge indicates a
                 stable, significant interaction between two components.
                 Structures and evolutions of the invariance network, in
                 particular the vanishing correlations, can shed
                 important light on locating causal anomalies and
                 performing diagnosis. However, existing approaches to
                 detect causal anomalies with the invariant network
                 often use the percentage of vanishing correlations to
                 rank possible casual components, which have several
                 limitations: (1) fault propagation in the network is
                 ignored, (2) the root casual anomalies may not always
                 be the nodes with a high percentage of vanishing
                 correlations, (3) temporal patterns of vanishing
                 correlations are not exploited for robust detection,
                 and (4) prior knowledge on anomalous nodes are not
                 exploited for (semi-)supervised detection. To address
                 these limitations, in this article we propose a network
                 diffusion based framework to identify significant
                 causal anomalies and rank them. Our approach can
                 effectively model fault propagation over the entire
                 invariant network and can perform joint inference on
                 both the structural and the time-evolving broken
                 invariance patterns. As a result, it can locate
                 high-confidence anomalies that are truly responsible
                 for the vanishing correlations and can compensate for
                 unstructured measurement noise in the system. Moreover,
                 when the prior knowledge on the anomalous status of
                 some nodes are available at certain time points, our
                 approach is able to leverage them to further enhance
                 the anomaly inference accuracy. When the prior
                 knowledge is noisy, our approach also automatically
                 learns reliable information and reduces impacts from
                 noises. By performing extensive experiments on
                 synthetic datasets, bank information system datasets,
                 and coal plant cyber-physical system datasets, we
                 demonstrate the effectiveness of our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2017:CDM,
  author =       "Tianyang Zhang and Peng Cui and Christos Faloutsos and
                 Yunfei Lu and Hao Ye and Wenwu Zhu and Shiqiang Yang",
  title =        "{comeNgo}: a Dynamic Model for Social Group
                 Evolution",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "41:1--41:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3059214",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How do social groups, such as Facebook groups and
                 Wechat groups, dynamically evolve over time? How do
                 people join the social groups, uniformly or with burst?
                 What is the pattern of people quitting from groups? Is
                 there a simple universal model to depict the
                 come-and-go patterns of various groups? In this
                 article, we examine temporal evolution patterns of more
                 than 100 thousands social groups with more than 10
                 million users. We surprisingly find that the evolution
                 patterns of real social groups goes far beyond the
                 classic dynamic models like SI and SIR. For example, we
                 observe both diffusion and non-diffusion mechanism in
                 the group joining process, and power-law decay in group
                 quitting process, rather than exponential decay as
                 expected in SIR model. Therefore, we propose a new
                 model comeNgo, a concise yet flexible dynamic model for
                 group evolution. Our model has the following
                 advantages: (a) Unification power: it generalizes
                 earlier theoretical models and different joining and
                 quitting mechanisms we find from observation. (b)
                 Succinctness and interpretability: it contains only six
                 parameters with clear physical meanings. (c) Accuracy:
                 it can capture various kinds of group evolution
                 patterns preciously, and the goodness of fit increases
                 by 58\% over baseline. (d) Usefulness: it can be used
                 in multiple application scenarios, such as forecasting
                 and pattern discovery. Furthermore, our model can
                 provide insights about different evolution patterns of
                 social groups, and we also find that group structure
                 and its evolution has notable relations with temporal
                 patterns of group evolution.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2017:CDI,
  author =       "Chen Chen and Hanghang Tong and Lei Xie and Lei Ying
                 and Qing He",
  title =        "Cross-Dependency Inference in Multi-Layered Networks:
                 a Collaborative Filtering Perspective",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "42:1--42:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3056562",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The increasingly connected world has catalyzed the
                 fusion of networks from different domains, which
                 facilitates the emergence of a new network
                 model-multi-layered networks. Examples of such kind of
                 network systems include critical infrastructure
                 networks, biological systems, organization-level
                 collaborations, cross-platform e-commerce, and so
                 forth. One crucial structure that distances
                 multi-layered network from other network models is its
                 cross-layer dependency, which describes the
                 associations between the nodes from different layers.
                 Needless to say, the cross-layer dependency in the
                 network plays an essential role in many data mining
                 applications like system robustness analysis and
                 complex network control. However, it remains a daunting
                 task to know the exact dependency relationships due to
                 noise, limited accessibility, and so forth. In this
                 article, we tackle the cross-layer dependency inference
                 problem by modeling it as a collective collaborative
                 filtering problem. Based on this idea, we propose an
                 effective algorithm Fascinate that can reveal
                 unobserved dependencies with linear complexity.
                 Moreover, we derive Fascinate-ZERO, an online variant
                 of Fascinate that can respond to a newly added node
                 timely by checking its neighborhood dependencies. We
                 perform extensive evaluations on real datasets to
                 substantiate the superiority of our proposed
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{DeStefani:2017:TCL,
  author =       "Lorenzo {De Stefani} and Alessandro Epasto and Matteo
                 Riondato and Eli Upfal",
  title =        "{TRI{\`E}ST}: Counting Local and Global Triangles in
                 Fully Dynamic Streams with Fixed Memory Size",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "43:1--43:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3059194",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "``Ogni lassada xe persa.''$^1$ --- Proverb from
                 Trieste, Italy. We present tri{\`e}st, a suite of
                 one-pass streaming algorithms to compute unbiased,
                 low-variance, high-quality approximations of the global
                 and local (i.e., incident to each vertex) number of
                 triangles in a fully dynamic graph represented as an
                 adversarial stream of edge insertions and deletions.
                 Our algorithms use reservoir sampling and its variants
                 to exploit the user-specified memory space at all
                 times. This is in contrast with previous approaches,
                 which require hard-to-choose parameters (e.g., a fixed
                 sampling probability) and offer no guarantees on the
                 amount of memory they use. We analyze the variance of
                 the estimations and show novel concentration bounds for
                 these quantities. Our experimental results on very
                 large graphs demonstrate that tri{\`e}st outperforms
                 state-of-the-art approaches in accuracy and exhibits a
                 small update time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hooi:2017:GBF,
  author =       "Bryan Hooi and Kijung Shin and Hyun Ah Song and Alex
                 Beutel and Neil Shah and Christos Faloutsos",
  title =        "Graph-Based Fraud Detection in the Face of
                 Camouflage",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "44:1--44:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3056563",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a bipartite graph of users and the products that
                 they review, or followers and followees, how can we
                 detect fake reviews or follows? Existing fraud
                 detection methods (spectral, etc.) try to identify
                 dense subgraphs of nodes that are sparsely connected to
                 the remaining graph. Fraudsters can evade these methods
                 using camouflage, by adding reviews or follows with
                 honest targets so that they look ``normal.'' Even
                 worse, some fraudsters use hijacked accounts from
                 honest users, and then the camouflage is indeed
                 organic. Our focus is to spot fraudsters in the
                 presence of camouflage or hijacked accounts. We propose
                 FRAUDAR, an algorithm that (a) is camouflage resistant,
                 (b) provides upper bounds on the effectiveness of
                 fraudsters, and (c) is effective in real-world data.
                 Experimental results under various attacks show that
                 FRAUDAR outperforms the top competitor in accuracy of
                 detecting both camouflaged and non-camouflaged fraud.
                 Additionally, in real-world experiments with a Twitter
                 follower--followee graph of 1.47 billion edges, FRAUDAR
                 successfully detected a subgraph of more than 4, 000
                 detected accounts, of which a majority had tweets
                 showing that they used follower-buying services.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Anderson:2017:AHE,
  author =       "Ashton Anderson and Jon Kleinberg and Sendhil
                 Mullainathan",
  title =        "Assessing Human Error Against a Benchmark of
                 Perfection",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "45:1--45:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046947",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "An increasing number of domains are providing us with
                 detailed trace data on human decisions in settings
                 where we can evaluate the quality of these decisions
                 via an algorithm. Motivated by this development, an
                 emerging line of work has begun to consider whether we
                 can characterize and predict the kinds of decisions
                 where people are likely to make errors. To investigate
                 what a general framework for human error prediction
                 might look like, we focus on a model system with a rich
                 history in the behavioral sciences: the decisions made
                 by chess players as they select moves in a game. We
                 carry out our analysis at a large scale, employing
                 datasets with several million recorded games, and using
                 chess tablebases to acquire a form of ground truth for
                 a subset of chess positions that have been completely
                 solved by computers but remain challenging for even the
                 best players in the world. We organize our analysis
                 around three categories of features that we argue are
                 present in most settings where the analysis of human
                 error is applicable: the skill of the decision-maker,
                 the time available to make the decision, and the
                 inherent difficulty of the decision. We identify rich
                 structure in all three of these categories of features,
                 and find strong evidence that in our domain, features
                 describing the inherent difficulty of an instance are
                 significantly more powerful than features based on
                 skill or time.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2017:DCM,
  author =       "Yihan Wang and Shaoxu Song and Lei Chen and Jeffrey Xu
                 Yu and Hong Cheng",
  title =        "Discovering Conditional Matching Rules",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "46:1--46:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070647",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Matching dependencies (MDs) have recently been
                 proposed to make data dependencies tolerant to various
                 information representations, and found useful in data
                 quality applications such as record matching. Instead
                 of the strict equality function used in traditional
                 dependency syntax (e.g., functional dependencies), MDs
                 specify constraints based on similarity and
                 identification. However, in practice, MDs may still be
                 too strict and applicable only in a subset of tuples in
                 a relation. Thereby, we study the conditional matching
                 dependencies (CMDs), which bind matching dependencies
                 only in a certain part of a table, i.e., MDs
                 conditionally applicable in a subset of tuples.
                 Compared to MDs, CMDs have more expressive power that
                 enables them to satisfy wider application needs. In
                 this article, we study several important theoretical
                 and practical issues of CMDs, including irreducible
                 CMDs with respect to the implication, discovery of CMDs
                 from data, reliable CMDs agreed most by a relation,
                 approximate CMDs almost satisfied in a relation, and
                 finally applications of CMDs in record matching and
                 missing value repairing. Through an extensive
                 experimental evaluation in real data sets, we
                 demonstrate the efficiency of proposed CMDs discovery
                 algorithms and effectiveness of CMDs in real
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Anagnostopoulos:2017:QDL,
  author =       "Christos Anagnostopoulos and Peter Triantafillou",
  title =        "Query-Driven Learning for Predictive Analytics of Data
                 Subspace Cardinality",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "47:1--47:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3059177",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Fundamental to many predictive analytics tasks is the
                 ability to estimate the cardinality (number of data
                 items) of multi-dimensional data subspaces, defined by
                 query selections over datasets. This is crucial for
                 data analysts dealing with, e.g., interactive data
                 subspace explorations, data subspace visualizations,
                 and in query processing optimization. However, in many
                 modern data systems, predictive analytics may be (i)
                 too costly money-wise, e.g., in clouds, (ii)
                 unreliable, e.g., in modern Big Data query engines,
                 where accurate statistics are difficult to
                 obtain/maintain, or (iii) infeasible, e.g., for privacy
                 issues. We contribute a novel, query-driven, function
                 estimation model of analyst-defined data subspace
                 cardinality. The proposed estimation model is highly
                 accurate in terms of prediction and accommodating the
                 well-known selection queries: multi-dimensional range
                 and distance-nearest neighbors (radius) queries. Our
                 function estimation model: (i) quantizes the vectorial
                 query space, by learning the analysts' access patterns
                 over a data space, (ii) associates query vectors with
                 their corresponding cardinalities of the
                 analyst-defined data subspaces, (iii) abstracts and
                 employs query vectorial similarity to predict the
                 cardinality of an unseen/unexplored data subspace, and
                 (iv) identifies and adapts to possible changes of the
                 query subspaces based on the theory of optimal
                 stopping. The proposed model is decentralized,
                 facilitating the scaling-out of such predictive
                 analytics queries. The research significance of the
                 model lies in that (i) it is an attractive solution
                 when data-driven statistical techniques are undesirable
                 or infeasible, (ii) it offers a scale-out,
                 decentralized training solution, (iii) it is applicable
                 to different selection query types, and (iv) it offers
                 a performance that is superior to that of data-driven
                 approaches.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2017:LSO,
  author =       "Yue Wu and Steven C. H. Hoi and Tao Mei and Nenghai
                 Yu",
  title =        "Large-Scale Online Feature Selection for Ultra-High
                 Dimensional Sparse Data",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "48:1--48:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070646",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Feature selection (FS) is an important technique in
                 machine learning and data mining, especially for
                 large-scale high-dimensional data. Most existing
                 studies have been restricted to batch learning, which
                 is often inefficient and poorly scalable when handling
                 big data in real world. As real data may arrive
                 sequentially and continuously, batch learning has to
                 retrain the model for the new coming data, which is
                 very computationally intensive. Online feature
                 selection (OFS) is a promising new paradigm that is
                 more efficient and scalable than batch learning
                 algorithms. However, existing online algorithms usually
                 fall short in their inferior efficacy. In this article,
                 we present a novel second-order OFS algorithm that is
                 simple yet effective, very fast and extremely scalable
                 to deal with large-scale ultra-high dimensional sparse
                 data streams. The basic idea is to exploit the
                 second-order information to choose the subset of
                 important features with high confidence weights. Unlike
                 existing OFS methods that often suffer from extra high
                 computational cost, we devise a novel algorithm with a
                 MaxHeap-based approach, which is not only more
                 effective than the existing first-order algorithms, but
                 also significantly more efficient and scalable. Our
                 extensive experiments validated that the proposed
                 technique achieves highly competitive accuracy as
                 compared with state-of-the-art batch FS methods,
                 meanwhile it consumes significantly less computational
                 cost that is orders of magnitude lower. Impressively,
                 on a billion-scale synthetic dataset (1-billion
                 dimensions, 1-billion non-zero features, and 1-million
                 samples), the proposed algorithm takes less than 3
                 minutes to run on a single PC.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Costa:2017:MTA,
  author =       "Alceu Ferraz Costa and Yuto Yamaguchi and Agma Juci
                 Machado Traina and Caetano {Traina Jr.} and Christos
                 Faloutsos",
  title =        "Modeling Temporal Activity to Detect Anomalous
                 Behavior in Social Media",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "49:1--49:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3064884",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Social media has become a popular and important tool
                 for human communication. However, due to this
                 popularity, spam and the distribution of malicious
                 content by computer-controlled users, known as bots,
                 has become a widespread problem. At the same time, when
                 users use social media, they generate valuable data
                 that can be used to understand the patterns of human
                 communication. In this article, we focus on the
                 following important question: Can we identify and use
                 patterns of human communication to decide whether a
                 human or a bot controls a user? The first contribution
                 of this article is showing that the distribution of
                 inter-arrival times (IATs) between postings is
                 characterized by following four patterns: (i)
                 heavy-tails, (ii) periodic-spikes, (iii) correlation
                 between consecutive values, and (iv) bimodallity. As
                 our second contribution, we propose a mathematical
                 model named Act-M (Activity Model). We show that Act-M
                 can accurately fit the distribution of IATs from social
                 media users. Finally, we use Act-M to develop a method
                 that detects if users are bots based only on the timing
                 of their postings. We validate Act-M using data from
                 over 55 million postings from four social media
                 services: Reddit, Twitter, Stack-Overflow, and
                 Hacker-News. Our experiments show that Act-M provides a
                 more accurate fit to the data than existing models for
                 human dynamics. Additionally, when detecting bots,
                 Act-M provided a precision higher than 93\% and 77\%
                 with a sensitivity of 70\% for the Twitter and Reddit
                 datasets, respectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Vosoughi:2017:RGP,
  author =       "Soroush Vosoughi and Mostafa `Neo' Mohsenvand and Deb
                 Roy",
  title =        "Rumor Gauge: Predicting the Veracity of Rumors on
                 {Twitter}",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "50:1--50:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070644",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The spread of malicious or accidental misinformation
                 in social media, especially in time-sensitive
                 situations, such as real-world emergencies, can have
                 harmful effects on individuals and society. In this
                 work, we developed models for automated verification of
                 rumors (unverified information) that propagate through
                 Twitter. To predict the veracity of rumors, we
                 identified salient features of rumors by examining
                 three aspects of information spread: linguistic style
                 used to express rumors, characteristics of people
                 involved in propagating information, and network
                 propagation dynamics. The predicted veracity of a time
                 series of these features extracted from a rumor (a
                 collection of tweets) is generated using Hidden Markov
                 Models. The verification algorithm was trained and
                 tested on 209 rumors representing 938,806 tweets
                 collected from real-world events, including the 2013
                 Boston Marathon bombings, the 2014 Ferguson unrest, and
                 the 2014 Ebola epidemic, and many other rumors about
                 various real-world events reported on popular websites
                 that document public rumors. The algorithm was able to
                 correctly predict the veracity of 75\% of the rumors
                 faster than any other public source, including
                 journalists and law enforcement officials. The ability
                 to track rumors and predict their outcomes may have
                 practical applications for news consumers, financial
                 markets, journalists, and emergency services, and more
                 generally to help minimize the impact of false
                 information on Twitter.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Boutemine:2017:MCS,
  author =       "Oualid Boutemine and Mohamed Bouguessa",
  title =        "Mining Community Structures in Multidimensional
                 Networks",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "51:1--51:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3080574",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We investigate the problem of community detection in
                 multidimensional networks, that is, networks where
                 entities engage in various interaction types
                 (dimensions) simultaneously. While some approaches have
                 been proposed to identify community structures in
                 multidimensional networks, there are a number of
                 problems still to solve. In fact, the majority of the
                 proposed approaches suffer from one or even more of the
                 following limitations: (1) difficulty detecting
                 communities in networks characterized by the presence
                 of many irrelevant dimensions, (2) lack of systematic
                 procedures to explicitly identify the relevant
                 dimensions of each community, and (3) dependence on a
                 set of user-supplied parameters, including the number
                 of communities, that require a proper tuning. Most of
                 the existing approaches are inadequate for dealing with
                 these three issues in a unified framework. In this
                 paper, we develop a novel approach that is capable of
                 addressing the aforementioned limitations in a single
                 framework. The proposed approach allows automated
                 identification of communities and their sub-dimensional
                 spaces using a novel objective function and a
                 constrained label propagation-based optimization
                 strategy. By leveraging the relevance of dimensions at
                 the node level, the strategy aims to maximize the
                 number of relevant within-community links while keeping
                 track of the most relevant dimensions. A notable
                 feature of the proposed approach is that it is able to
                 automatically identify low dimensional community
                 structures embedded in a high dimensional space.
                 Experiments on synthetic and real multidimensional
                 networks illustrate the suitability of the new
                 method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Algizawy:2017:RTL,
  author =       "Essam Algizawy and Tetsuji Ogawa and Ahmed El-Mahdy",
  title =        "Real-Time Large-Scale Map Matching Using Mobile Phone
                 Data",
  journal =      j-TKDD,
  volume =       "11",
  number =       "4",
  pages =        "52:1--52:??",
  month =        aug,
  year =         "2017",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3046945",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 22 09:23:44 MST 2018",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the wide spread use of mobile phones, cellular
                 mobile big data is becoming an important resource that
                 provides a wealth of information with almost no cost.
                 However, the data generally suffers from relatively
                 high spatial granularity, limiting the scope of its
                 application. In this article, we consider, for the
                 first time, the utility of actual mobile big data for
                 map matching allowing for ``microscopic'' level traffic
                 analysis. The state-of-the-art in map matching
                 generally targets GPS data, which provides far denser
                 sampling and higher location resolution than the mobile
                 data. Our approach extends the typical Hidden-Markov
                 model used in map matching to accommodate for highly
                 sparse location trajectories, exploit the large mobile
                 data volume to learn the model parameters, and exploit
                 the sparsity of the data to provide for real-time
                 Viterbi processing. We study an actual, anonymised
                 mobile trajectories data set of the city of Dakar,
                 Senegal, spanning a year, and generate a corresponding
                 road-level traffic density, at an hourly granularity,
                 for each mobile trajectory. We observed a relatively
                 high correlation between the generated traffic
                 intensities and corresponding values obtained by the
                 gravity and equilibrium models typically used in
                 mobility analysis, indicating the utility of the
                 approach as an alternative means for traffic
                 analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{vanLeeuwen:2018:ETS,
  author =       "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken
                 and Dafna Shahaf and Christos Faloutsos",
  title =        "Editorial: {TKDD} Special Issue on Interactive Data
                 Exploration and Analytics",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "1:1--1:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3181707",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rayar:2018:VIS,
  author =       "Fr{\'e}d{\'e}ric Rayar and Sabine Barrat and Fatma
                 Bouali and Gilles Venturini",
  title =        "A Viewable Indexing Structure for the Interactive
                 Exploration of Dynamic and Large Image Collections",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "2:1--2:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047011",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Thanks to the capturing devices cost reduction and the
                 advent of social networks, the size of image
                 collections is becoming extremely huge. Many works in
                 the literature have addressed the indexing of large
                 image collections for search purposes. However, there
                 is a lack of support for exploratory data mining. One
                 may want to wander around the images and experience
                 serendipity in the exploration process. Thus, effective
                 paradigms not only for organising, but also visualising
                 these image collections become necessary. In this
                 article, we present a study to jointly index and
                 visualise large image collections. The work focuses on
                 satisfying three constraints. First, large image
                 collections, up to million of images, shall be handled.
                 Second, dynamic collections, such as ever-growing
                 collections, shall be processed in an incremental way,
                 without reprocessing the whole collection at each
                 modification. Finally, an intuitive and interactive
                 exploration system shall be provided to the user to
                 allow him to easily mine image collections. To this
                 end, a data partitioning algorithm has been modified
                 and proximity graphs have been used to fit the
                 visualisation purpose. A custom web platform has been
                 implemented to visualise the hierarchical and
                 graph-based hybrid structure. The results of a user
                 evaluation we have conducted show that the exploration
                 of the collections is intuitive and smooth thanks to
                 the proposed structure. Furthermore, the scalability of
                 the proposed indexing method is proved using large
                 public image collections.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Makki:2018:AVV,
  author =       "Raheleh Makki and Eder Carvalho and Axel J. Soto and
                 Stephen Brooks and Maria Cristina Ferreira {De
                 Oliveira} and Evangelos Milios and Rosane Minghim",
  title =        "{ATR-Vis}: Visual and Interactive Information
                 Retrieval for Parliamentary Discussions in {Twitter}",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "3:1--3:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047010",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The worldwide adoption of Twitter turned it into one
                 of the most popular platforms for content analysis as
                 it serves as a gauge of the public's feeling and
                 opinion on a variety of topics. This is particularly
                 true of political discussions and lawmakers' actions
                 and initiatives. Yet, one common but unrealistic
                 assumption is that the data of interest for analysis is
                 readily available in a comprehensive and accurate form.
                 Data need to be retrieved, but due to the brevity and
                 noisy nature of Twitter content, it is difficult to
                 formulate user queries that match relevant posts that
                 use different terminology without introducing a
                 considerable volume of unwanted content. This problem
                 is aggravated when the analysis must contemplate
                 multiple and related topics of interest, for which
                 comments are being concurrently posted. This article
                 presents Active Tweet Retrieval Visualization
                 (ATR-Vis), a user-driven visual approach for the
                 retrieval of Twitter content applicable to this
                 scenario. The method proposes a set of active retrieval
                 strategies to involve an analyst in such a way that a
                 major improvement in retrieval coverage and precision
                 is attained with minimal user effort. ATR-Vis enables
                 non-technical users to benefit from the aforementioned
                 active learning strategies by providing visual aids to
                 facilitate the requested supervision. This supports the
                 exploration of the space of potentially relevant
                 tweets, and affords a better understanding of the
                 retrieval results. We evaluate our approach in
                 scenarios in which the task is to retrieve tweets
                 related to multiple parliamentary debates within a
                 specific time span. We collected two Twitter datasets,
                 one associated with debates in the Canadian House of
                 Commons during a particular week in May 2014, and
                 another associated with debates in the Brazilian
                 Federal Senate during a selected week in May 2015. The
                 two use cases illustrate the effectiveness of ATR-Vis
                 for the retrieval of relevant tweets, while
                 quantitative results show that our approach achieves
                 high retrieval quality with a modest amount of
                 supervision. Finally, we evaluated our tool with three
                 external users who perform searching in social media as
                 part of their professional work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lim:2018:MEA,
  author =       "Yongsub Lim and Minsoo Jung and U. Kang",
  title =        "Memory-Efficient and Accurate Sampling for Counting
                 Local Triangles in Graph Streams: From Simple to
                 Multigraphs",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "4:1--4:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3022186",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How can we estimate local triangle counts accurately
                 in a graph stream without storing the whole graph? How
                 to handle duplicated edges in local triangle counting
                 for graph stream? Local triangle counting, which
                 computes the number of triangles attached to each node
                 in a graph, is a very important problem with wide
                 applications in social network analysis, anomaly
                 detection, web mining, and the like. In this article,
                 we propose algorithms for local triangle counting in a
                 graph stream based on edge sampling: M ascot for a
                 simple graph, and MultiBMascot and MultiWMascot for a
                 multigraph. To develop Mascot, we first present two
                 naive local triangle counting algorithms in a graph
                 stream, called Mascot-C and Mascot-A. Mascot-C is based
                 on constant edge sampling, and Mascot-A improves its
                 accuracy by utilizing more memory spaces. Mascot
                 achieves both accuracy and memory-efficiency of the two
                 algorithms by unconditional triangle counting for a new
                 edge, regardless of whether it is sampled or not.
                 Extending the idea to a multigraph, we develop two
                 algorithms MultiBMascot and MultiWMascot. MultiBMascot
                 enables local triangle counting on the corresponding
                 simple graph of a streamed multigraph without explicit
                 graph conversion; MultiWMascot considers repeated
                 occurrences of an edge as its weight and counts each
                 triangle as the product of its three edge weights. In
                 contrast to the existing algorithm that requires prior
                 knowledge on the target graph and appropriately set
                 parameters, our proposed algorithms require only one
                 parameter of edge sampling probability. Through
                 extensive experiments, we show that for the same number
                 of edges sampled, M ascot provides the best accuracy
                 compared to the existing algorithm as well as Mascot-C
                 and Mascot-A. We also demonstrate that MultiBMascot on
                 a multigraph is comparable to Mascot-C on the
                 counterpart simple graph, and MultiWMascot becomes more
                 accurate for higher degree nodes. Thanks to Mascot, we
                 also discover interesting anomalous patterns in real
                 graphs, including core-peripheries in the web, a
                 bimodal call pattern in a phone call history, and
                 intensive collaboration in DBLP.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shi:2018:VAB,
  author =       "Lei Shi and Hanghang Tong and Madelaine Daianu and
                 Feng Tian and Paul M. Thompson",
  title =        "Visual Analysis of Brain Networks Using Sparse
                 Regression Models",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "5:1--5:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3023363",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Studies of the human brain network are becoming
                 increasingly popular in the fields of neuroscience,
                 computer science, and neurology. Despite this rapidly
                 growing line of research, gaps remain on the
                 intersection of data analytics, interactive visual
                 representation, and the human intelligence-all needed
                 to advance our understanding of human brain networks.
                 This article tackles this challenge by exploring the
                 design space of visual analytics. We propose an
                 integrated framework to orchestrate computational
                 models with comprehensive data visualizations on the
                 human brain network. The framework targets two
                 fundamental tasks: the visual exploration of
                 multi-label brain networks and the visual comparison
                 among brain networks across different subject groups.
                 During the first task, we propose a novel interactive
                 user interface to visualize sets of labeled brain
                 networks; in our second task, we introduce sparse
                 regression models to select discriminative features
                 from the brain network to facilitate the comparison.
                 Through user studies and quantitative experiments, both
                 methods are shown to greatly improve the visual
                 comparison performance. Finally, real-world case
                 studies with domain experts demonstrate the utility and
                 effectiveness of our framework to analyze
                 reconstructions of human brain connectivity maps. The
                 perceptually optimized visualization design and the
                 feature selection model calibration are shown to be the
                 key to our significant findings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Galbrun:2018:MRS,
  author =       "Esther Galbrun and Pauli Miettinen",
  title =        "Mining Redescriptions with Siren",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "6:1--6:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3007212",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In many areas of science, scientists need to find
                 distinct common characterizations of the same objects
                 and, vice versa, to identify sets of objects that admit
                 multiple shared descriptions. For example, in biology,
                 an important task is to identify the bioclimatic
                 constraints that allow some species to survive, that
                 is, to describe geographical regions both in terms of
                 the fauna that inhabits them and of their bioclimatic
                 conditions. In data analysis, the task of automatically
                 generating such alternative characterizations is called
                 redescription mining. If a domain expert wants to use
                 redescription mining in his research, merely being able
                 to find redescriptions is not enough. He must also be
                 able to understand the redescriptions found, adjust
                 them to better match his domain knowledge, test
                 alternative hypotheses with them, and guide the mining
                 process toward results he considers interesting. To
                 facilitate these goals, we introduce Siren, an
                 interactive tool for mining and visualizing
                 redescriptions. Siren allows to obtain redescriptions
                 in an anytime fashion through efficient, distributed
                 mining, to examine the results in various linked
                 visualizations, to interact with the results either
                 directly or via the visualizations, and to guide the
                 mining algorithm toward specific redescriptions. In
                 this article, we explain the features of Siren and why
                 they are useful for redescription mining. We also
                 propose two novel redescription mining algorithms that
                 improve the generalizability of the results compared to
                 the existing ones.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2018:IDC,
  author =       "Hao Wu and Maoyuan Sun and Peng Mi and Nikolaj Tatti
                 and Chris North and Naren Ramakrishnan",
  title =        "Interactive Discovery of Coordinated Relationship
                 Chains with Maximum Entropy Models",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "7:1--7:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047017",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Modern visual analytic tools promote human-in-the-loop
                 analysis but are limited in their ability to direct the
                 user toward interesting and promising directions of
                 study. This problem is especially acute when the
                 analysis task is exploratory in nature, e.g., the
                 discovery of potentially coordinated relationships in
                 massive text datasets. Such tasks are very common in
                 domains like intelligence analysis and security
                 forensics where the goal is to uncover surprising
                 coalitions bridging multiple types of relations. We
                 introduce new maximum entropy models to discover
                 surprising chains of relationships leveraging count
                 data about entity occurrences in documents. These
                 models are embedded in a visual analytic system called
                 MERCER (Maximum Entropy Relational Chain ExploRer) that
                 treats relationship bundles as first class objects and
                 directs the user toward promising lines of inquiry. We
                 demonstrate how user input can judiciously direct
                 analysis toward valid conclusions, whereas a purely
                 algorithmic approach could be led astray. Experimental
                 results on both synthetic and real datasets from the
                 intelligence community are presented.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Choo:2018:VVA,
  author =       "Jaegul Choo and Hannah Kim and Edward Clarkson and
                 Zhicheng Liu and Changhyun Lee and Fuxin Li and
                 Hanseung Lee and Ramakrishnan Kannan and Charles D.
                 Stolper and John Stasko and Haesun Park",
  title =        "{VisIRR}: a Visual Analytics System for Information
                 Retrieval and Recommendation for Large-Scale Document
                 Data",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "8:1--8:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070616",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In this article, we present an interactive visual
                 information retrieval and recommendation system, called
                 VisIRR, for large-scale document discovery. VisIRR
                 effectively combines the paradigms of (1) a passive
                 pull through query processes for retrieval and (2) an
                 active push that recommends items of potential interest
                 to users based on their preferences. Equipped with an
                 efficient dynamic query interface against a large-scale
                 corpus, VisIRR organizes the retrieved documents into
                 high-level topics and visualizes them in a 2D space,
                 representing the relationships among the topics along
                 with their keyword summary. In addition, based on
                 interactive personalized preference feedback with
                 regard to documents, VisIRR provides document
                 recommendations from the entire corpus, which are
                 beyond the retrieved sets. Such recommended documents
                 are visualized in the same space as the retrieved
                 documents, so that users can seamlessly analyze both
                 existing and newly recommended ones. This article
                 presents novel computational methods, which make these
                 integrated representations and fast interactions
                 possible for a large-scale document corpus. We
                 illustrate how the system works by providing detailed
                 usage scenarios. Additionally, we present preliminary
                 user study results for evaluating the effectiveness of
                 the system.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kamat:2018:SBA,
  author =       "Niranjan Kamat and Arnab Nandi",
  title =        "A Session-Based Approach to Fast-But-Approximate
                 Interactive Data Cube Exploration",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "9:1--9:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070648",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "With the proliferation of large datasets, sampling has
                 become pervasive in data analysis. Sampling has
                 numerous benefits-from reducing the computation time
                 and cost to increasing the scope of interactive
                 analysis. A popular task in data science, well-suited
                 toward sampling, is the computation of
                 fast-but-approximate aggregations over sampled data.
                 Aggregation is a foundational block of data analysis,
                 with data cube being its primary construct. We observe
                 that such aggregation queries are typically issued in
                 an ad-hoc, interactive setting. In contrast to one-off
                 queries, a typical query session consists of a series
                 of quick queries, interspersed with the user inspecting
                 the results and formulating the next query. The
                 similarity between session queries opens up
                 opportunities for reusing computation of not just query
                 results, but also error estimates. Error estimates need
                 to be provided alongside sampled results for the
                 results to be meaningful. We propose Sesame, a rewrite
                 and caching framework that accelerates the entire
                 interactive session of aggregation queries over sampled
                 data. We focus on two unique and computationally
                 expensive aspects of this use case: query speculation
                 in the presence of sampling, and error computation, and
                 provide novel strategies for result and error reuse. We
                 demonstrate that our approach outperforms conventional
                 sampled aggregation techniques by at least an order of
                 magnitude, without modifying the underlying database.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Senin:2018:GID,
  author =       "Pavel Senin and Jessica Lin and Xing Wang and Tim
                 Oates and Sunil Gandhi and Arnold P. Boedihardjo and
                 Crystal Chen and Susan Frankenstein",
  title =        "{GrammarViz} 3.0: Interactive Discovery of
                 Variable-Length Time Series Patterns",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "10:1--10:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3051126",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The problems of recurrent and anomalous pattern
                 discovery in time series, e.g., motifs and discords,
                 respectively, have received a lot of attention from
                 researchers in the past decade. However, since the
                 pattern search space is usually intractable, most
                 existing detection algorithms require that the patterns
                 have discriminative characteristics and have its length
                 known in advance and provided as input, which is an
                 unreasonable requirement for many real-world problems.
                 In addition, patterns of similar structure, but of
                 different lengths may co-exist in a time series.
                 Addressing these issues, we have developed algorithms
                 for variable-length time series pattern discovery that
                 are based on symbolic discretization and grammar
                 inference-two techniques whose combination enables the
                 structured reduction of the search space and discovery
                 of the candidate patterns in linear time. In this work,
                 we present GrammarViz 3.0-a software package that
                 provides implementations of proposed algorithms and
                 graphical user interface for interactive
                 variable-length time series pattern discovery. The
                 current version of the software provides an alternative
                 grammar inference algorithm that improves the time
                 series motif discovery workflow, and introduces an
                 experimental procedure for automated discretization
                 parameter selection that builds upon the minimum
                 cardinality maximum cover principle and aids the time
                 series recurrent and anomalous pattern discovery.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Datta:2018:CVC,
  author =       "Srayan Datta and Eytan Adar",
  title =        "{CommunityDiff}: Visualizing Community Clustering
                 Algorithms",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "11:1--11:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3047009",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community detection is an oft-used analytical function
                 of network analysis but can be a black art to apply in
                 practice. Grouping of related nodes is important for
                 identifying patterns in network datasets but also
                 notoriously sensitive to input data and algorithm
                 selection. This is further complicated by the fact
                 that, depending on domain and use case, the ground
                 truth knowledge of the end-user can vary from none to
                 complete. In this work, we present CommunityDiff, an
                 interactive visualization system that combines
                 visualization and active learning (AL) to support the
                 end-user's analytical process. As the end-user
                 interacts with the system, a continuous refinement
                 process updates both the community labels and
                 visualizations. CommunityDiff features a mechanism for
                 visualizing ensemble spaces, weighted combinations of
                 algorithm output, that can identify patterns,
                 commonalities, and differences among multiple community
                 detection algorithms. Among other features,
                 CommunityDiff introduces an AL mechanism that visually
                 indicates uncertainty about community labels to focus
                 end-user attention and supporting end-user control that
                 ranges from explicitly indicating the number of
                 expected communities to merging and splitting
                 communities. Based on this end-user input,
                 CommunityDiff dynamically recalculates communities. We
                 demonstrate the viability of our through a study of
                 speed of end-user convergence on satisfactory community
                 labels. As part of building CommunityDiff, we describe
                 a design process that can be adapted to other
                 Interactive Machine Learning applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2018:LIC,
  author =       "Yang Yang and Jie Tang and Juanzi Li",
  title =        "Learning to Infer Competitive Relationships in
                 Heterogeneous Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "12:1--12:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3051127",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Detecting and monitoring competitors is fundamental to
                 a company to stay ahead in the global market. Existing
                 studies mainly focus on mining competitive
                 relationships within a single data source, while
                 competing information is usually distributed in
                 multiple networks. How to discover the underlying
                 patterns and utilize the heterogeneous knowledge to
                 avoid biased aspects in this issue is a challenging
                 problem. In this article, we study the problem of
                 mining competitive relationships by learning across
                 heterogeneous networks. We use Twitter and patent
                 records as our data sources and statistically study the
                 patterns behind the competitive relationships. We find
                 that the two networks exhibit different but
                 complementary patterns of competitions. Overall, we
                 find that similar entities tend to be competitors, with
                 a probability of 4 times higher than chance. On the
                 other hand, in social network, we also find a 10
                 minutes phenomenon: when two entities are mentioned by
                 the same user within 10 minutes, the likelihood of them
                 being competitors is 25 times higher than chance. Based
                 on the discovered patterns, we propose a novel Topical
                 Factor Graph Model. Generally, our model defines a
                 latent topic layer to bridge the Twitter network and
                 patent network. It then employs a semi-supervised
                 learning algorithm to classify the relationships
                 between entities (e.g., companies or products). We test
                 the proposed model on two real data sets and the
                 experimental results validate the effectiveness of our
                 model, with an average of +46\% improvement over
                 alternative methods. Besides, we further demonstrate
                 the competitive relationships inferred by our proposed
                 model can be applied in the job-hopping prediction
                 problem by achieving an average of +10.7\%
                 improvement.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2018:PSM,
  author =       "Boyue Wang and Yongli Hu and Junbin Gao and Yanfeng
                 Sun and Baocai Yin",
  title =        "Partial Sum Minimization of Singular Values
                 Representation on {Grassmann} Manifolds",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "13:1--13:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3092690",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering is one of the fundamental topics in data
                 mining and pattern recognition. As a prospective
                 clustering method, the subspace clustering has made
                 considerable progress in recent researches, e.g.,
                 sparse subspace clustering (SSC) and low rank
                 representation (LRR). However, most existing subspace
                 clustering algorithms are designed for vectorial data
                 from linear spaces, thus not suitable for
                 high-dimensional data with intrinsic non-linear
                 manifold structure. For high-dimensional or manifold
                 data, few research pays attention to clustering
                 problems. The purpose of clustering on manifolds tends
                 to cluster manifold-valued data into several groups
                 according to the mainfold-based similarity metric. This
                 article proposes an extended LRR model for
                 manifold-valued Grassmann data that incorporates prior
                 knowledge by minimizing partial sum of singular values
                 instead of the nuclear norm, namely Partial Sum
                 minimization of Singular Values Representation
                 (GPSSVR). The new model not only enforces the global
                 structure of data in low rank, but also retains
                 important information by minimizing only smaller
                 singular values. To further maintain the local
                 structures among Grassmann points, we also integrate
                 the Laplacian penalty with GPSSVR. The proposed model
                 and algorithms are assessed on a public human face
                 dataset, some widely used human action video datasets
                 and a real scenery dataset. The experimental results
                 show that the proposed methods obviously outperform
                 other state-of-the-art methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Trevino:2018:DSE,
  author =       "Edgar S. Garc{\'\i}a Trevi{\~n}o and Muhammad Zaid
                 Hameed and Javier A. Barria",
  title =        "Data Stream Evolution Diagnosis Using Recursive
                 Wavelet Density Estimators",
  journal =      j-TKDD,
  volume =       "12",
  number =       "1",
  pages =        "14:1--14:??",
  month =        feb,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3106369",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Data streams are a new class of data that is becoming
                 pervasively important in a wide range of applications,
                 ranging from sensor networks, environmental monitoring
                 to finance. In this article, we propose a novel
                 framework for the online diagnosis of evolution of
                 multidimensional streaming data that incorporates
                 Recursive Wavelet Density Estimators into the context
                 of Velocity Density Estimation. In the proposed
                 framework changes in streaming data are characterized
                 by the use of local and global evolution coefficients.
                 In addition, we propose for the analysis of changes in
                 the correlation structure of the data a recursive
                 implementation of the Pearson correlation coefficient
                 using exponential discounting. Two visualization tools,
                 namely temporal and spatial velocity profiles, are
                 extended in the context of the proposed framework.
                 These are the three main advantages of the proposed
                 method over previous approaches: (1) the memory storage
                 required is minimal and independent of any window size;
                 (2) it has a significantly lower computational
                 complexity; and (3) it makes possible the fast
                 diagnosis of data evolution at all dimensions and at
                 relevant combinations of dimensions with only one pass
                 of the data. With the help of the four examples, we
                 show the framework's relevance in a change detection
                 context and its potential capability for real world
                 applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kaushal:2018:ETP,
  author =       "Vishal Kaushal and Manasi Patwardhan",
  title =        "Emerging Trends in Personality Identification Using
                 Online Social Networks --- a Literature Survey",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "15:1--15:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3070645",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Personality is a combination of all the
                 attributes-behavioral, temperamental, emotional, and
                 mental-that characterizes a unique individual. Ability
                 to identify personalities of people has always been of
                 great interest to the researchers due to its
                 importance. It continues to find highly useful
                 applications in many domains. Owing to the increasing
                 popularity of online social networks, researchers have
                 started looking into the possibility of predicting a
                 user's personality from his online social networking
                 profile, which serves as a rich source of textual as
                 well as non-textual content published by users. In the
                 process of creating social networking profiles, users
                 reveal a lot about themselves both in what they share
                 and how they say it. Studies suggest that the online
                 social networking websites are, in fact, a relevant and
                 valid means of communicating personality. In this
                 article, we review these various studies reported in
                 literature toward identification of personality using
                 online social networks. To the best of our knowledge,
                 this is the first reported survey of its kind at the
                 time of submission. We hope that our contribution,
                 especially in summarizing the previous findings and in
                 identifying the directions for future research in this
                 area, would encourage researchers to do more work in
                 this budding area.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pandove:2018:SRC,
  author =       "Divya Pandove and Shivan Goel and Rinkl Rani",
  title =        "Systematic Review of Clustering High-Dimensional and
                 Large Datasets",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "16:1--16:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3132088",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Technological advancement has enabled us to store and
                 process huge amount of data in relatively short spans
                 of time. The nature of data is rapidly changing,
                 particularly its dimensionality is more commonly multi-
                 and high-dimensional. There is an immediate need to
                 expand our focus to include analysis of
                 high-dimensional and large datasets. Data analysis is
                 becoming a mammoth task, due to incremental increase in
                 data volume and complexity in terms of heterogony of
                 data. It is due to this dynamic computing environment
                 that the existing techniques either need to be modified
                 or discarded to handle new data in multiple
                 high-dimensions. Data clustering is a tool that is used
                 in many disciplines, including data mining, so that
                 meaningful knowledge can be extracted from seemingly
                 unstructured data. The aim of this article is to
                 understand the problem of clustering and various
                 approaches addressing this problem. This article
                 discusses the process of clustering from both
                 microviews (data treating) and macroviews (overall
                 clustering process). Different distance and similarity
                 measures, which form the cornerstone of effective data
                 clustering, are also identified. Further, an in-depth
                 analysis of different clustering approaches focused on
                 data mining, dealing with large-scale datasets is
                 given. These approaches are comprehensively compared to
                 bring out a clear differentiation among them. This
                 article also surveys the problem of high-dimensional
                 data and the existing approaches, that makes it more
                 relevant. It also explores the latest trends in cluster
                 analysis, and the real-life applications of this
                 concept. This survey is exhaustive as it tries to cover
                 all the aspects of clustering in the field of data
                 mining.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2018:LSC,
  author =       "Yixuan Li and Kun He and Kyle Kloster and David Bindel
                 and John Hopcroft",
  title =        "Local Spectral Clustering for Overlapping Community
                 Detection",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "17:1--17:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3106370",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Large graphs arise in a number of contexts and
                 understanding their structure and extracting
                 information from them is an important research area.
                 Early algorithms for mining communities have focused on
                 global graph structure, and often run in time
                 proportional to the size of the entire graph. As we
                 explore networks with millions of vertices and find
                 communities of size in the hundreds, it becomes
                 important to shift our attention from macroscopic
                 structure to microscopic structure in large networks. A
                 growing body of work has been adopting local expansion
                 methods in order to identify communities from a few
                 exemplary seed members. In this article, we propose a
                 novel approach for finding overlapping communities
                 called Lemon (Local Expansion via Minimum One Norm).
                 Provided with a few known seeds, the algorithm finds
                 the community by performing a local spectral diffusion.
                 The core idea of Lemon is to use short random walks to
                 approximate an invariant subspace near a seed set,
                 which we refer to as local spectra. Local spectra can
                 be viewed as the low-dimensional embedding that
                 captures the nodes' closeness in the local network
                 structure. We show that Lemon's performance in
                 detecting communities is competitive with
                 state-of-the-art methods. Moreover, the running time
                 scales with the size of the community rather than that
                 of the entire graph. The algorithm is easy to implement
                 and is highly parallelizable. We further provide
                 theoretical analysis of the local spectral properties,
                 bounding the measure of tightness of extracted
                 community using the eigenvalues of graph Laplacian. We
                 thoroughly evaluate our approach using both synthetic
                 and real-world datasets across different domains, and
                 analyze the empirical variations when applying our
                 method to inherently different networks in practice. In
                 addition, the heuristics on how the seed set quality
                 and quantity would affect the performance are
                 provided.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Costa:2018:MOC,
  author =       "Gianni Costa and Riccardo Ortale",
  title =        "Mining Overlapping Communities and Inner Role
                 Assignments through {Bayesian} Mixed-Membership Models
                 of Networks with Context-Dependent Interactions",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "18:1--18:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3106368",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community discovery and role assignment have been
                 recently integrated into an unsupervised approach for
                 the exploratory analysis of overlapping communities and
                 inner roles in networks. However, the formation of ties
                 in these prototypical research efforts is not truly
                 realistic, since it does not account for a fundamental
                 aspect of link establishment in real-world networks,
                 i.e., the explicative reasons that cause interactions
                 among nodes. Such reasons can be interpreted as generic
                 requirements of nodes, that are met by other nodes and
                 essentially pertain both to the nodes themselves and to
                 their interaction contexts (i.e., the respective
                 communities and roles). In this article, we present two
                 new model-based machine-learning approaches, wherein
                 community discovery and role assignment are seamlessly
                 integrated and simultaneously performed through
                 approximate posterior inference in Bayesian
                 mixed-membership models of directed networks. The
                 devised models account for the explicative reasons
                 governing link establishment in terms of node-specific
                 and contextual latent interaction factors. The former
                 are inherently characteristic of nodes, while the
                 latter are characterizations of nodes in the context of
                 the individual communities and roles. The generative
                 process of both models assigns nodes to communities
                 with respective roles and connects them through
                 directed links, which are probabilistically governed by
                 their node-specific and contextual interaction factors.
                 The difference between the proposed models lies in the
                 exploitation of the contextual interaction factors.
                 More precisely, in one model, the contextual
                 interaction factors have the same impact on link
                 generation. In the other model, the contextual
                 interaction factors are weighted by the extent of
                 involvement of the linked nodes in the respective
                 communities and roles. We develop MCMC algorithms
                 implementing approximate posterior inference and
                 parameter estimation within our models. Finally, we
                 conduct an intensive comparative experimentation, which
                 demonstrates their superiority in community compactness
                 and link prediction on various real-world and synthetic
                 networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Long:2018:PMS,
  author =       "Cheng Long and Raymond Chi-Wing Wong and Victor Junqiu
                 Wei",
  title =        "Profit Maximization with Sufficient Customer
                 Satisfactions",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "19:1--19:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3110216",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In many commercial campaigns, we observe that there
                 exists a tradeoff between the number of customers
                 satisfied by the company and the profit gained. Merely
                 satisfying as many customers as possible or maximizing
                 the profit is not desirable. To this end, in this
                 article, we propose a new problem called
                 $k$-Satisfiability Assignment for Maximizing the Profit
                 ( $$ k $$-SAMP), where $k$ is a user parameter and a
                 non-negative integer. Given a set $P$ of products and a
                 set $O$ of customers, $k$-SAMP is to find an assignment
                 between $P$ and $O$ such that at least $k$ customers
                 are satisfied in the assignment and the profit incurred
                 by this assignment is maximized. Although we find that
                 this problem is closely related to two classic computer
                 science problems, namely maximum weight matching and
                 maximum matching, the techniques developed for these
                 classic problems cannot be adapted to our $k$-SAMP
                 problem. In this work, we design a novel algorithm
                 called Adjust for the $k$-SAMP problem. Given an
                 assignment $A$, Adjust iteratively increases the profit
                 of $A$ by adjusting some appropriate matches in $A$
                 while keeping at least $k$ customers satisfied in $A$.
                 We prove that Adjust returns a global optimum.
                 Extensive experiments were conducted that verified the
                 efficiency of Adjust.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ramezani:2018:CDU,
  author =       "Maryam Ramezani and Ali Khodadadi and Hamid R.
                 Rabiee",
  title =        "Community Detection Using Diffusion Information",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "20:1--20:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3110215",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Community detection in social networks has become a
                 popular topic of research during the last decade. There
                 exist a variety of algorithms for modularizing the
                 network graph into different communities. However, they
                 mostly assume that partial or complete information of
                 the network graphs are available that is not feasible
                 in many cases. In this article, we focus on detecting
                 communities by exploiting their diffusion information.
                 To this end, we utilize the Conditional Random Fields
                 (CRF) to discover the community structures. The
                 proposed method, community diffusion (CoDi), does not
                 require any prior knowledge about the network structure
                 or specific properties of communities. Furthermore, in
                 contrast to the structure-based community detection
                 methods, this method is able to identify the hidden
                 communities. The experimental results indicate
                 considerable improvements in detecting communities
                 based on accuracy, scalability, and real cascade
                 information measures.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chiasserini:2018:ACS,
  author =       "Carla-Fabiana Chiasserini and Michel Garetto and Emili
                 Leonardi",
  title =        "De-anonymizing Clustered Social Networks by
                 Percolation Graph Matching",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "21:1--21:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3127876",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Online social networks offer the opportunity to
                 collect a huge amount of valuable information about
                 billions of users. The analysis of this data by service
                 providers and unintended third parties are posing
                 serious treats to user privacy. In particular, recent
                 work has shown that users participating in more than
                 one online social network can be identified based only
                 on the structure of their links to other users. An
                 effective tool to de-anonymize social network users is
                 represented by graph matching algorithms. Indeed, by
                 exploiting a sufficiently large set of seed nodes, a
                 percolation process can correctly match almost all
                 nodes across the different social networks. In this
                 article, we show the crucial role of clustering, which
                 is a relevant feature of social network graphs (and
                 many other systems). Clustering has both the effect of
                 making matching algorithms more prone to errors, and
                 the potential to greatly reduce the number of seeds
                 needed to trigger percolation. We show these facts by
                 considering a fairly general class of random geometric
                 graphs with variable clustering level. We assume that
                 seeds can be identified in particular sub-regions of
                 the network graph, while no a priori knowledge about
                 the location of the other nodes is required. Under
                 these conditions, we show how clever algorithms can
                 achieve surprisingly good performance while limiting
                 the number of matching errors.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2018:JRL,
  author =       "Wayne Xin Zhao and Feifan Fan and Ji-Rong Wen and
                 Edward Y. Chang",
  title =        "Joint Representation Learning for Location-Based
                 Social Networks with Multi-Grained Sequential
                 Contexts",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "22:1--22:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3127875",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article studies the problem of learning effective
                 representations for Location-Based Social Networks
                 (LBSN), which is useful in many tasks such as location
                 recommendation and link prediction. Existing network
                 embedding methods mainly focus on capturing topology
                 patterns reflected in social connections, while
                 check-in sequences, the most important data type in
                 LBSNs, are not directly modeled by these models. In
                 this article, we propose a representation learning
                 method for LBSNs called as JRLM++, which models
                 check-in sequences together with social connections. To
                 capture sequential relatedness, JRLM++ characterizes
                 two levels of sequential contexts, namely fine-grained
                 and coarse-grained contexts. We present a learning
                 algorithm tailored to the hierarchical architecture of
                 the proposed model. We conduct extensive experiments on
                 two important applications using real-world datasets.
                 The experimental results demonstrate the superiority of
                 our model. The proposed model can generate effective
                 representations for both users and locations in the
                 same embedding space, which can be further utilized to
                 improve multiple LBSN tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hu:2018:CFT,
  author =       "Guang-Neng Hu and Xin-Yu Dai and Feng-Yu Qiu and Rui
                 Xia and Tao Li and Shu-Jian Huang and Jia-Jun Chen",
  title =        "Collaborative Filtering with Topic and Social Latent
                 Factors Incorporating Implicit Feedback",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "23:1--23:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3127873",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recommender systems (RSs) provide an effective way of
                 alleviating the information overload problem by
                 selecting personalized items for different users.
                 Latent factors-based collaborative filtering (CF) has
                 become the popular approaches for RSs due to its
                 accuracy and scalability. Recently, online social
                 networks and user-generated content provide diverse
                 sources for recommendation beyond ratings. Although
                 social matrix factorization (Social MF) and topic
                 matrix factorization (Topic MF) successfully exploit
                 social relations and item reviews, respectively; both
                 of them ignore some useful information. In this
                 article, we investigate the effective data fusion by
                 combining the aforementioned approaches. First, we
                 propose a novel model MR3 to jointly model three
                 sources of information (i.e., ratings, item reviews,
                 and social relations) effectively for rating prediction
                 by aligning the latent factors and hidden topics.
                 Second, we incorporate the implicit feedback from
                 ratings into the proposed model to enhance its
                 capability and to demonstrate its flexibility. We
                 achieve more accurate rating prediction on real-life
                 datasets over various state-of-the-art methods.
                 Furthermore, we measure the contribution from each of
                 the three data sources and the impact of implicit
                 feedback from ratings, followed by the sensitivity
                 analysis of hyperparameters. Empirical studies
                 demonstrate the effectiveness and efficacy of our
                 proposed model and its extension.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Perozzi:2018:DCA,
  author =       "Bryan Perozzi and Leman Akoglu",
  title =        "Discovering Communities and Anomalies in Attributed
                 Graphs: Interactive Visual Exploration and
                 Summarization",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "24:1--24:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3139241",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a network with node attributes, how can we
                 identify communities and spot anomalies? How can we
                 characterize, describe, or summarize the network in a
                 succinct way? Community extraction requires a measure
                 of quality for connected subgraphs (e.g., social
                 circles). Existing subgraph measures, however, either
                 consider only the connectedness of nodes inside the
                 community and ignore the cross-edges at the boundary
                 (e.g., density) or only quantify the structure of the
                 community and ignore the node attributes (e.g.,
                 conductance). In this work, we focus on node-attributed
                 networks and introduce: (1) a new measure of subgraph
                 quality for attributed communities called normality,
                 (2) a community extraction algorithm that uses
                 normality to extract communities and a few
                 characterizing attributes per community, and (3) a
                 summarization and interactive visualization approach
                 for attributed graph exploration. More specifically,
                 (1) we first introduce a new measure to quantify the
                 normality of an attributed subgraph. Our normality
                 measure carefully utilizes structure and attributes
                 together to quantify both the internal consistency and
                 external separability. We then formulate an objective
                 function to automatically infer a few attributes
                 (called the ``focus'') and respective attribute
                 weights, so as to maximize the normality score of a
                 given subgraph. Most notably, unlike many other
                 approaches, our measure allows for many cross-edges as
                 long as they can be ``exonerated;'' i.e., either (i)
                 are expected under a null graph model, and/or (ii)
                 their boundary nodes do not exhibit the focus
                 attributes. Next, (2) we propose AMEN (for Attributed
                 Mining of Entity Networks), an algorithm that
                 simultaneously discovers the communities and their
                 respective focus in a given graph, with a goal to
                 maximize the total normality. Communities for which a
                 focus that yields high normality cannot be found are
                 considered low quality or anomalous. Last, (3) we
                 formulate a summarization task with a multi-criteria
                 objective, which selects a subset of the communities
                 that (i) cover the entire graph well, are (ii) high
                 quality and (iii) diverse in their focus attributes. We
                 further design an interactive visualization interface
                 that presents the communities to a user in an
                 interpretable, user-friendly fashion. The user can
                 explore all the communities, analyze various
                 algorithm-generated summaries, as well as devise their
                 own summaries interactively to characterize the network
                 in a succinct way. As the experiments on real-world
                 attributed graphs show, our proposed approaches
                 effectively find anomalous communities and outperform
                 several existing measures and methods, such as
                 conductance, density, OddBall, and SODA. We also
                 conduct extensive user studies to measure the
                 capability and efficiency that our approach provides to
                 the users toward network summarization, exploration,
                 and sensemaking.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bonab:2018:GGO,
  author =       "Hamed R. Bonab and Fazli Can",
  title =        "{GOOWE}: Geometrically Optimum and Online-Weighted
                 Ensemble Classifier for Evolving Data Streams",
  journal =      j-TKDD,
  volume =       "12",
  number =       "2",
  pages =        "25:1--25:??",
  month =        mar,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3139240",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:45 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Designing adaptive classifiers for an evolving data
                 stream is a challenging task due to the data size and
                 its dynamically changing nature. Combining individual
                 classifiers in an online setting, the ensemble
                 approach, is a well-known solution. It is possible that
                 a subset of classifiers in the ensemble outperforms
                 others in a time-varying fashion. However, optimum
                 weight assignment for component classifiers is a
                 problem, which is not yet fully addressed in online
                 evolving environments. We propose a novel data stream
                 ensemble classifier, called Geometrically Optimum and
                 Online-Weighted Ensemble (GOOWE), which assigns optimum
                 weights to the component classifiers using a sliding
                 window containing the most recent data instances. We
                 map vote scores of individual classifiers and true
                 class labels into a spatial environment. Based on the
                 Euclidean distance between vote scores and
                 ideal-points, and using the linear least squares (LSQ)
                 solution, we present a novel, dynamic, and online
                 weighting approach. While LSQ is used for batch mode
                 ensemble classifiers, it is the first time that we
                 adapt and use it for online environments by providing a
                 spatial modeling of online ensembles. In order to show
                 the robustness of the proposed algorithm, we use
                 real-world datasets and synthetic data generators using
                 the Massive Online Analysis (MOA) libraries. First, we
                 analyze the impact of our weighting system on
                 prediction accuracy through two scenarios. Second, we
                 compare GOOWE with eight state-of-the-art ensemble
                 classifiers in a comprehensive experimental
                 environment. Our experiments show that GOOWE provides
                 improved reactions to different types of concept drift
                 compared to our baselines. The statistical tests
                 indicate a significant improvement in accuracy, with
                 conservative time and memory requirements.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xie:2018:ERP,
  author =       "Hong Xie and Richard T. B. Ma and John C. S. Lui",
  title =        "Enhancing Reputation via Price Discounts in E-Commerce
                 Systems: a Data-Driven Approach",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "26:1--26:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154417",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Reputation systems have become an indispensable
                 component of modern E-commerce systems, as they help
                 buyers make informed decisions in choosing trustworthy
                 sellers. To attract buyers and increase the transaction
                 volume, sellers need to earn reasonably high reputation
                 scores. This process usually takes a substantial amount
                 of time. To accelerate this process, sellers can
                 provide price discounts to attract users, but the
                 underlying difficulty is that sellers have no prior
                 knowledge on buyers' preferences over price discounts.
                 In this article, we develop an online algorithm to
                 infer the optimal discount rate from data. We first
                 formulate an optimization framework to select the
                 optimal discount rate given buyers' discount
                 preferences, which is a tradeoff between the short-term
                 profit and the ramp-up time (for reputation). We then
                 derive the closed-form optimal discount rate, which
                 gives us key insights in applying a stochastic bandits
                 framework to infer the optimal discount rate from the
                 transaction data with regret upper bounds. We show that
                 the computational complexity of evaluating the
                 performance metrics is infeasibly high, and therefore,
                 we develop efficient randomized algorithms with
                 guaranteed performance to approximate them. Finally, we
                 conduct experiments on a dataset crawled from eBay.
                 Experimental results show that our framework can trade
                 60\% of the short-term profit for reducing the ramp-up
                 time by 40\%. This reduction in the ramp-up time can
                 increase the long-term profit of a seller by at least
                 20\%.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Belcastro:2018:GRA,
  author =       "Loris Belcastro and Fabrizio Marozzo and Domenico
                 Talia and Paolo Trunfio",
  title =        "{G-RoI}: Automatic Region-of-Interest Detection Driven
                 by Geotagged Social Media Data",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "27:1--27:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154411",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Geotagged data gathered from social media can be used
                 to discover interesting locations visited by users
                 called Places-of-Interest (PoIs). Since a PoI is
                 generally identified by the geographical coordinates of
                 a single point, it is hard to match it with user
                 trajectories. Therefore, it is useful to define an
                 area, called Region-of-Interest ( RoI ), to represent
                 the boundaries of the PoI's area. RoI mining techniques
                 are aimed at discovering ROIs from PoIs and other data.
                 Existing RoI mining techniques are based on three main
                 approaches: predefined shapes, density-based
                 clustering, and grid-based aggregation. This article
                 proposes G-RoI, a novel RoI mining technique that
                 exploits the indications contained in geotagged social
                 media items to discover RoIs with a high accuracy.
                 Experiments performed over a set of PoIs in Rome and
                 Paris using social media geotagged data, demonstrate
                 that G-RoI in most cases achieves better results than
                 existing techniques. In particular, the mean F$_1$
                 score is 0.34 higher than that obtained with the
                 well-known DBSCAN algorithm in Rome RoIs and 0.23
                 higher in Paris RoIs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shin:2018:FAF,
  author =       "Kijung Shin and Bryan Hooi and Christos Faloutsos",
  title =        "Fast, Accurate, and Flexible Algorithms for Dense
                 Subtensor Mining",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "28:1--28:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154414",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a large-scale and high-order tensor, how can we
                 detect dense subtensors in it? Can we spot them in
                 near-linear time but with quality guarantees? Extensive
                 previous work has shown that dense subtensors, as well
                 as dense subgraphs, indicate anomalous or fraudulent
                 behavior (e.g., lockstep behavior in social networks).
                 However, available algorithms for detecting dense
                 subtensors are not satisfactory in terms of speed,
                 accuracy, and flexibility. In this work, we propose two
                 algorithms, called M-Zoom and M-Biz, for fast and
                 accurate dense-subtensor detection with various density
                 measures. M-Zoom gives a lower bound on the density of
                 detected subtensors, while M-Biz guarantees the local
                 optimality of detected subtensors. M-Zoom and M-Biz can
                 be combined, giving the following advantages: (1)
                 Scalable: scale near-linearly with all aspects of
                 tensors and are up to 114$ \times $ faster than
                 state-of-the-art methods with similar accuracy, (2)
                 Provably accurate: provide a guarantee on the lowest
                 density and local optimality of the subtensors they
                 find, (3) Flexible: support multi-subtensor detection
                 and size bounds as well as diverse density measures,
                 and (4) Effective: successfully detected edit wars and
                 bot activities in Wikipedia, and spotted network
                 attacks from a TCP dump with near-perfect accuracy (AUC
                 = 0.98).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liang:2018:PRA,
  author =       "Jiongqian Liang and Deepak Ajwani and Patrick K.
                 Nicholson and Alessandra Sala and Srinivasan
                 Parthasarathy",
  title =        "Prioritized Relationship Analysis in Heterogeneous
                 Information Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "29:1--29:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154401",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "An increasing number of applications are modeled and
                 analyzed in network form, where nodes represent
                 entities of interest and edges represent interactions
                 or relationships between entities. Commonly, such
                 relationship analysis tools assume homogeneity in both
                 node type and edge type. Recent research has sought to
                 redress the assumption of homogeneity and focused on
                 mining heterogeneous information networks (HINs) where
                 both nodes and edges can be of different types.
                 Building on such efforts, in this work, we articulate a
                 novel approach for mining relationships across entities
                 in such networks while accounting for user preference
                 over relationship type and interestingness metric. We
                 formalize the problem as a top-$k$ lightest paths
                 problem, contextualized in a real-world communication
                 network, and seek to find the k most interesting path
                 instances matching the preferred relationship type. Our
                 solution, PROphetic HEuristic Algorithm for Path
                 Searching (PRO-HEAPS), leverages a combination of novel
                 graph preprocessing techniques, well-designed
                 heuristics and the venerable $ A* $ search algorithm.
                 We run our algorithm on real-world large-scale graphs
                 and show that our algorithm significantly outperforms a
                 wide variety of baseline approaches with speedups as
                 large as 100X. To widen the range of applications, we
                 also extend PRO-HEAPS to (i) support relationship
                 analysis between two groups of entities and (ii) allow
                 pattern path in the query to contain logical statements
                 with operators AND, OR, NOT, and wild-card ``.''. We
                 run experiments using this generalized version of
                 PRO-HEAPS and demonstrate that the advantage of
                 PRO-HEAPS becomes even more pronounced for these
                 general cases. Furthermore, we conduct a comprehensive
                 analysis to study how the performance of PRO-HEAPS
                 varies with respect to various attributes of the input
                 HIN. We finally conduct a case study to demonstrate
                 valuable applications of our algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2018:WTC,
  author =       "Hong Huang and Yuxiao Dong and Jie Tang and Hongxia
                 Yang and Nitesh V. Chawla and Xiaoming Fu",
  title =        "Will Triadic Closure Strengthen Ties in Social
                 Networks?",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "30:1--30:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154399",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The social triad-a group of three people-is one of the
                 simplest and most fundamental social groups. Extensive
                 network and social theories have been developed to
                 understand its structure, such as triadic closure and
                 social balance. Over the course of a triadic
                 closure-the transition from two ties to three among
                 three users, the strength dynamics of its social ties,
                 however, are much less well understood. Using two
                 dynamic networks from social media and mobile
                 communication, we examine how the formation of the
                 third tie in a triad affects the strength of the
                 existing two ties. Surprisingly, we find that in about
                 80\% social triads, the strength of the first two ties
                 is weakened although averagely the tie strength in the
                 two networks maintains an increasing or stable trend.
                 We discover that (1) the decrease in tie strength among
                 three males is more sharply than that among females,
                 and (2) the tie strength between celebrities is more
                 likely to be weakened as the closure of a triad than
                 those between ordinary people. Furthermore, we
                 formalize a triadic tie strength dynamics prediction
                 problem to infer whether social ties of a triad will
                 become weakened after its closure. We propose a TRIST
                 method-a kernel density estimation (KDE)-based
                 graphical model-to solve the problem by incorporating
                 user demographics, temporal effects, and structural
                 information. Extensive experiments demonstrate that
                 TRIST offers a greater than 82\% potential
                 predictability for inferring triadic tie strength
                 dynamics in both networks. The leveraging of the KDE
                 and structural correlations enables TRIST to outperform
                 baselines by up to 30\% in terms of F1-score.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2018:LSB,
  author =       "Guangyong Chen and Fengyuan Zhu and Pheng Ann Heng",
  title =        "Large-Scale {Bayesian} Probabilistic Matrix
                 Factorization with Memo-Free Distributed Variational
                 Inference",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3161886",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Bayesian Probabilistic Matrix Factorization (BPMF) is
                 a powerful model in many dyadic data prediction
                 problems, especially the applications of Recommender
                 system. However, its poor scalability has limited its
                 wide applications on massive data. Based on the
                 conditional independence property of observed entries
                 in BPMF model, we propose a novel distributed memo-free
                 variational inference method for large-scale matrix
                 factorization problems. Compared with the
                 state-of-the-art methods, the proposed method is
                 favored for several attractive properties.
                 Specifically, it does not require tuning of learning
                 rate carefully, shuffling the training set at each
                 iteration, or storing massive redundant variables, and
                 can introduce new agents into the computations on the
                 fly. We conduct extensive experiments on both synthetic
                 and real-world datasets. The experimental results show
                 that our method can converge significantly faster with
                 better prediction performance than alternative
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2018:MVL,
  author =       "Sheng Li and Ming Shao and Yun Fu",
  title =        "Multi-View Low-Rank Analysis with Applications to
                 Outlier Detection",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3168363",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Detecting outliers or anomalies is a fundamental
                 problem in various machine learning and data mining
                 applications. Conventional outlier detection algorithms
                 are mainly designed for single-view data. Nowadays,
                 data can be easily collected from multiple views, and
                 many learning tasks such as clustering and
                 classification have benefited from multi-view data.
                 However, outlier detection from multi-view data is
                 still a very challenging problem, as the data in
                 multiple views usually have more complicated
                 distributions and exhibit inconsistent behaviors. To
                 address this problem, we propose a multi-view low-rank
                 analysis (MLRA) framework for outlier detection in this
                 article. MLRA pursuits outliers from a new perspective,
                 robust data representation. It contains two major
                 components. First, the cross-view low-rank coding is
                 performed to reveal the intrinsic structures of data.
                 In particular, we formulate a regularized
                 rank-minimization problem, which is solved by an
                 efficient optimization algorithm. Second, the outliers
                 are identified through an outlier score estimation
                 procedure. Different from the existing multi-view
                 outlier detection methods, MLRA is able to detect two
                 different types of outliers from multiple views
                 simultaneously. To this end, we design a criterion to
                 estimate the outlier scores by analyzing the obtained
                 representation coefficients. Moreover, we extend MLRA
                 to tackle the multi-view group outlier detection
                 problem. Extensive evaluations on seven UCI datasets,
                 the MovieLens, the USPS-MNIST, and the WebKB datasets
                 demonstrate that our approach outperforms several
                 state-of-the-art outlier detection methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Altowim:2018:PAP,
  author =       "Yasser Altowim and Dmitri V. Kalashnikov and Sharad
                 Mehrotra",
  title =        "{ProgressER}: Adaptive Progressive Approach to
                 Relational Entity Resolution",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3154410",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Entity resolution (ER) is the process of identifying
                 which entities in a dataset refer to the same
                 real-world object. In relational ER, the dataset
                 consists of multiple entity-sets and relationships
                 among them. Such relationships cause the resolution of
                 some entities to influence the resolution of other
                 entities. For instance, consider a relational dataset
                 that consists of a set of research paper entities and a
                 set of venue entities. In such a dataset, deciding that
                 two research papers are the same may trigger the fact
                 that their venues are also the same. This article
                 proposes a progressive approach to relational ER, named
                 ProgressER, that aims to produce the highest quality
                 result given a constraint on the resolution budget,
                 specified by the user. Such a progressive approach is
                 useful for many emerging analytical applications that
                 require low latency response (and thus cannot tolerate
                 delays caused by cleaning the entire dataset) and/or in
                 situations where the underlying resources are
                 constrained or costly to use. To maximize the quality
                 of the result, ProgressER follows an adaptive strategy
                 that periodically monitors and reassesses the
                 resolution progress to determine which parts of the
                 dataset should be resolved next and how they should be
                 resolved. More specifically, ProgressER divides the
                 input budget into several resolution windows and
                 analyzes the resolution progress at the beginning of
                 each window to generate a resolution plan for the
                 current window. A resolution plan specifies which
                 blocks of entities and which entity pairs within blocks
                 need to be resolved during the plan execution phase of
                 that window. In addition, ProgressER specifies, for
                 each identified pair of entities, the order in which
                 the similarity functions should be applied on the pair.
                 Such an order plays a significant role in reducing the
                 overall cost because applying the first few functions
                 in this order might be sufficient to resolve the pair.
                 The empirical evaluation of ProgressER demonstrates its
                 significant advantage in terms of progressiveness over
                 the traditional ER techniques for the given problem
                 settings.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bakerman:2018:TGH,
  author =       "Jordan Bakerman and Karl Pazdernik and Alyson Wilson
                 and Geoffrey Fairchild and Rian Bahran",
  title =        "{Twitter} Geolocation: a Hybrid Approach",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178112",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Geotagging Twitter messages is an important tool for
                 event detection and enrichment. Despite the
                 availability of both social media content and user
                 network information, these two features are generally
                 utilized separately in the methodology. In this
                 article, we create a hybrid method that uses Twitter
                 content and network information jointly as model
                 features. We use Gaussian mixture models to map the raw
                 spatial distribution of the model features to a
                 predicted field. This approach is scalable to large
                 datasets and provides a natural representation of model
                 confidence. Our method is tested against other
                 approaches and we achieve greater prediction accuracy.
                 The model also improves both precision and coverage.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Moreno:2018:TKP,
  author =       "Sebastian Moreno and Jennifer Neville and Sergey
                 Kirshner",
  title =        "Tied {Kronecker} Product Graph Models to Capture
                 Variance in Network Populations",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3161885",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Much of the past work on mining and modeling networks
                 has focused on understanding the observed properties of
                 single example graphs. However, in many real-life
                 applications it is important to characterize the
                 structure of populations of graphs. In this work, we
                 analyze the distributional properties of probabilistic
                 generative graph models (PGGMs) for network
                 populations. PGGMs are statistical methods that model
                 the network distribution and match common
                 characteristics of real-world networks. Specifically,
                 we show that most PGGMs cannot reflect the natural
                 variability in graph properties observed across
                 multiple networks because their edge generation process
                 assumes independence among edges. Then, we propose the
                 mixed Kronecker Product Graph Model (mKPGM), a scalable
                 generalization of KPGMs that uses tied parameters to
                 increase the variability of the sampled networks, while
                 preserving the edge probabilities in expectation. We
                 compare mKPGM to several other graph models. The
                 results show that learned mKPGMs accurately represent
                 the characteristics of real-world networks, while also
                 effectively capturing the natural variability in
                 network structure.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2018:FFR,
  author =       "Pei Yang and Qi Tan and Jingrui He",
  title =        "Function-on-Function Regression with Mode-Sparsity
                 Regularization",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178113",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Functional data is ubiquitous in many domains, such as
                 healthcare, social media, manufacturing process, sensor
                 networks, and so on. The goal of function-on-function
                 regression is to build a mapping from functional
                 predictors to functional response. In this article, we
                 propose a novel function-on-function regression model
                 based on mode-sparsity regularization. The main idea is
                 to represent the regression coefficient function
                 between predictor and response as the double expansion
                 of basis functions, and then use a mode-sparsity
                 regularization to automatically filter out irrelevant
                 basis functions for both predictors and responses. The
                 proposed approach is further extended to the tensor
                 version to accommodate multiple functional predictors.
                 While allowing the dimensionality of the regression
                 weight matrix or tensor to be relatively large, the
                 mode-sparsity regularized model facilitates the
                 multi-way shrinking of basis functions for each mode.
                 The proposed mode-sparsity regularization covers a wide
                 spectrum of sparse models for function-on-function
                 regression. The resulting optimization problem is
                 challenging due to the non-smooth property of the
                 mode-sparsity regularization. We develop an efficient
                 algorithm to solve the problem, which works in an
                 iterative update fashion, and converges to the global
                 optimum. Furthermore, we analyze the generalization
                 performance of the proposed method and derive an upper
                 bound for the consistency between the recovered
                 function and the underlying true function. The
                 effectiveness of the proposed approach is verified on
                 benchmark functional datasets in various domains.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Khodadadi:2018:CTU,
  author =       "Ali Khodadadi and Seyed Abbas Hosseini and Erfan
                 Tavakoli and Hamid R. Rabiee",
  title =        "Continuous-Time User Modeling in Presence of Badges: a
                 Probabilistic Approach",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3162050",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "User modeling plays an important role in delivering
                 customized web services to the users and improving
                 their engagement. However, most user models in the
                 literature do not explicitly consider the temporal
                 behavior of users. More recently, continuous-time user
                 modeling has gained considerable attention and many
                 user behavior models have been proposed based on
                 temporal point processes. However, typical point
                 process-based models often considered the impact of
                 peer influence and content on the user participation
                 and neglected other factors. Gamification elements are
                 among those factors that are neglected, while they have
                 a strong impact on user participation in online
                 services. In this article, we propose interdependent
                 multi-dimensional temporal point processes that capture
                 the impact of badges on user participation besides the
                 peer influence and content factors. We extend the
                 proposed processes to model user actions over the
                 community-based question and answering websites, and
                 propose an inference algorithm based on
                 Variational-Expectation Maximization that can
                 efficiently learn the model parameters. Extensive
                 experiments on both synthetic and real data gathered
                 from Stack Overflow show that our inference algorithm
                 learns the parameters efficiently and the proposed
                 method can better predict the user behavior compared to
                 the alternatives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Peng:2018:MEO,
  author =       "Min Peng and Jiahui Zhu and Hua Wang and Xuhui Li and
                 Yanchun Zhang and Xiuzhen Zhang and Gang Tian",
  title =        "Mining Event-Oriented Topics in Microblog Stream with
                 Unsupervised Multi-View Hierarchical Embedding",
  journal =      j-TKDD,
  volume =       "12",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3173044",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "This article presents an unsupervised multi-view
                 hierarchical embedding (UMHE) framework to sufficiently
                 reveal the intrinsic topical knowledge in social
                 events. Event-oriented topics are highly related to
                 such events as it can provide explicit descriptions of
                 what have happened in social community. In many
                 real-world cases, however, it is difficult to include
                 all attributes of microblogs, more often, textual
                 aspects only are available. Traditional topic modelling
                 methods have failed to generate event-oriented topics
                 with the textual aspects, since the inherent relations
                 between topics are often overlooked in these methods.
                 Meanwhile, the metrics in original word vocabulary
                 space might not effectively capture semantic distances.
                 Our UMHE framework overcomes the severe information
                 deficiency and poor feature representation. The UMHE
                 first develops a multi-view Bayesian rose tree to
                 preliminarily generate prior knowledge for latent
                 topics and their relations. With such prior knowledge,
                 we design an unsupervised translation-based
                 hierarchical embedding method to make a better
                 representation of these latent topics. By applying
                 self-adaptive spectral clustering on the embedding
                 space and the original space concomitantly, we
                 eventually extract event-oriented topics in word
                 distributions to express social events. Our framework
                 is purely data-driven and unsupervised, without any
                 external knowledge. Experimental results on TREC
                 Tweets2011 dataset and Sina Weibo dataset demonstrate
                 that the UMHE framework can construct hierarchical
                 structure with high fitness, but also yield topic
                 embeddings with salient semantics; therefore, it can
                 derive event-oriented topics with meaningful
                 descriptions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Toth:2018:GDT,
  author =       "Edward Toth and Sanjay Chawla",
  title =        "{GT$ \Delta $}: Detecting Temporal Changes in Group
                 Stochastic Processes",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "39:1--39:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3183346",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Given a portfolio of stocks or a series of frames in a
                 video how do we detect significant changes in a group
                 of values for real-time applications? In this article,
                 we formalize the problem of sequentially detecting
                 temporal changes in a group of stochastic processes. As
                 a solution to this particular problem, we propose the
                 group temporal change (GT$ \Delta $) algorithm, a
                 simple yet effective technique for the sequential
                 detection of significant changes in a variety of
                 statistical properties of a group over time. Due to the
                 flexible framework of the GT$ \Delta $ algorithm, a
                 domain expert is able to select one or more statistical
                 properties that they are interested in monitoring. The
                 usefulness of our proposed algorithm is also
                 demonstrated against state-of-the-art techniques on
                 synthetically generated data as well as on two
                 real-world applications; a portfolio of healthcare
                 stocks over a 20 year period and a video monitoring the
                 activity of our Sun.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xie:2018:SNM,
  author =       "Wei Xie and Feida Zhu and Jing Xiao and Jianzong
                 Wang",
  title =        "Social Network Monitoring for Bursty Cascade
                 Detection",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "40:1--40:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3178048",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Social network services have become important and
                 efficient platforms for users to share all kinds of
                 information. The capability to monitor user-generated
                 information and detect bursts from information
                 diffusions in these social networks brings value to a
                 wide range of real-life applications, such as viral
                 marketing. However, in reality, as a third party, there
                 is always a cost for gathering information from each
                 user or so-called social network sensor. The question
                 then arises how to select a budgeted set of social
                 network sensors to form the data stream for burst
                 detection without compromising the detection
                 performance. In this article, we present a general
                 sensor selection solution for different burst detection
                 approaches. We formulate this problem as a constraint
                 satisfaction problem that has high computational
                 complexity. To reduce the computational cost, we first
                 reduce most of the constraints by making use of the
                 fact that bursty cascades are rare among the whole
                 population. We then transform the problem into an
                 Linear Programming (LP) problem. Furthermore, we use
                 the sub-gradient method instead of the standard simplex
                 method or interior-point method to solve the LP
                 problem, which makes it possible for our solution to
                 scale up to large social networks. Evaluating our
                 solution on millions of real information cascades, we
                 demonstrate both the effectiveness and efficiency of
                 our approach.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2018:MGC,
  author =       "Xiaowei Chen and John C. S. Lui",
  title =        "Mining Graphlet Counts in Online Social Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "41:1--41:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182392",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Counting subgraphs is a fundamental analysis task for
                 online social networks (OSNs). Given the sheer size and
                 restricted access of OSN, efficient computation of
                 subgraph counts is highly challenging. Although a
                 number of algorithms have been proposed to estimate the
                 relative counts of subgraphs in OSNs with restricted
                 access, there are only few works which try to solve a
                 more general problem, i.e., counting subgraph
                 frequencies. In this article, we propose an efficient
                 random walk-based framework to estimate the subgraph
                 counts. Our framework generates samples by leveraging
                 consecutive steps of the random walk as well as by
                 observing neighbors of visited nodes. Using the
                 importance sampling technique, we derive unbiased
                 estimators of the subgraph counts. To make better use
                 of the degree information of visited nodes, we also
                 design improved estimators, which increases the
                 accuracy of the estimation with no additional cost. We
                 conduct extensive experimental evaluation on real-world
                 OSNs to confirm our theoretical claims. The experiment
                 results show that our estimators are unbiased,
                 accurate, efficient, and better than the
                 state-of-the-art algorithms. For the Weibo graph with
                 more than 58 million nodes, our method produces
                 estimate of triangle count with an error less than 5\%
                 using only 20,000 sampled nodes. Detailed comparison
                 with the state-of-the-art methods demonstrates that our
                 algorithm is 2--10 times more accurate.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2018:CGM,
  author =       "Hongfu Liu and Yun Fu",
  title =        "Consensus Guided Multi-View Clustering",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "42:1--42:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182384",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In recent decades, tremendous emerging techniques
                 thrive the artificial intelligence field due to the
                 increasing collected data captured from multiple
                 sensors. These multi-view data provide more rich
                 information than traditional single-view data. Fusing
                 heterogeneous information for certain tasks is a core
                 part of multi-view learning, especially for multi-view
                 clustering. Although numerous multi-view clustering
                 algorithms have been proposed, most scholars focus on
                 finding the common space of different views, but
                 unfortunately ignore the benefits from partition level
                 by ensemble clustering. For ensemble clustering,
                 however, there is no interaction between individual
                 partitions from each view and the final consensus one.
                 To fill the gap, we propose a Consensus Guided
                 Multi-View Clustering (CMVC) framework, which
                 incorporates the generation of basic partitions from
                 each view and fusion of consensus clustering in an
                 interactive way, i.e., the consensus clustering guides
                 the generation of basic partitions, and high quality
                 basic partitions positively contribute to the consensus
                 clustering as well. We design a non-trivial
                 optimization solution to formulate CMVC into two
                 iterative $k$-means clusterings by an approximate
                 calculation. In addition, the generalization of CMVC
                 provides a rich feasibility for different scenarios,
                 and the extension of CMVC with incomplete multi-view
                 clustering further validates the effectiveness for
                 real-world applications. Extensive experiments
                 demonstrate the advantages of CMVC over other widely
                 used multi-view clustering methods in terms of cluster
                 validity, and the robustness of CMVC to some important
                 parameters and incomplete multi-view data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2018:BGD,
  author =       "Hung-Hsuan Chen",
  title =        "{Behavior2Vec}: Generating Distributed Representations
                 of Users' Behaviors on Products for Recommender
                 Systems",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "43:1--43:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3184454",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Most studies on recommender systems target at
                 increasing the click through rate, and hope that the
                 number of orders will increase as well. We argue that
                 clicking and purchasing an item are different
                 behaviors. Thus, we should probably apply different
                 strategies for different objectives, e.g., increase the
                 click through rate, or increase the order rate. In this
                 article, we propose to generate the distributed
                 representations of users' viewing and purchasing
                 behaviors on an e-commerce website. By leveraging on
                 the cosine distance between the distributed
                 representations of the behaviors on items under
                 different contexts, we can predict a user's next
                 clicking or purchasing item more precisely, compared to
                 several baseline methods. Perhaps more importantly, we
                 found that the distributed representations may help
                 discover interesting analogies among the products. We
                 may utilize such analogies to explain how two products
                 are related, and eventually apply different
                 recommendation strategies under different scenarios. We
                 developed the Behavior2Vec library for demonstration.
                 The library can be accessed at
                 https://github.com/ncu-dart/behavior2vec/.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Saha:2018:EMO,
  author =       "Sriparna Saha and Sayantan Mitra and Stefan Kramer",
  title =        "Exploring Multiobjective Optimization for Multiview
                 Clustering",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "44:1--44:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182181",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We present a new multiview clustering approach based
                 on multiobjective optimization. In contrast to existing
                 clustering algorithms based on multiobjective
                 optimization, it is generally applicable to data
                 represented by two or more views and does not require
                 specifying the number of clusters a priori. The
                 approach builds upon the search capability of a
                 multiobjective simulated annealing based technique,
                 AMOSA, as the underlying optimization technique. In the
                 first version of the proposed approach, an internal
                 cluster validity index is used to assess the quality of
                 different partitionings obtained using different views.
                 A new way of checking the compatibility of these
                 different partitionings is also proposed and this is
                 used as another objective function. A new encoding
                 strategy and some new mutation operators are
                 introduced. Finally, a new way of computing a consensus
                 partitioning from multiple individual partitions
                 obtained on multiple views is proposed. As a baseline
                 and for comparison, two multiobjective based ensemble
                 clustering techniques are proposed to combine the
                 outputs of different simple clustering approaches. The
                 efficacy of the proposed clustering methods is shown
                 for partitioning several real-world datasets having
                 multiple views. To show the practical usefulness of the
                 method, we present results on web-search result
                 clustering, where the task is to find a suitable
                 partitioning of web snippets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2018:GRS,
  author =       "Hao Wu and Yue Ning and Prithwish Chakraborty and
                 Jilles Vreeken and Nikolaj Tatti and Naren
                 Ramakrishnan",
  title =        "Generating Realistic Synthetic Population Datasets",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "45:1--45:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182383",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Modern studies of societal phenomena rely on the
                 availability of large datasets capturing attributes and
                 activities of synthetic, city-level, populations. For
                 instance, in epidemiology, synthetic population
                 datasets are necessary to study disease propagation and
                 intervention measures before implementation. In social
                 science, synthetic population datasets are needed to
                 understand how policy decisions might affect
                 preferences and behaviors of individuals. In public
                 health, synthetic population datasets are necessary to
                 capture diagnostic and procedural characteristics of
                 patient records without violating confidentialities of
                 individuals. To generate such datasets over a large set
                 of categorical variables, we propose the use of the
                 maximum entropy principle to formalize a generative
                 model such that in a statistically well-founded way we
                 can optimally utilize given prior information about the
                 data, and are unbiased otherwise. An efficient
                 inference algorithm is designed to estimate the maximum
                 entropy model, and we demonstrate how our approach is
                 adept at estimating underlying data distributions. We
                 evaluate this approach against both simulated data and
                 US census datasets, and demonstrate its feasibility
                 using an epidemic simulation application.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{LaFond:2018:DSC,
  author =       "Timothy {La Fond} and Jennifer Neville and Brian
                 Gallagher",
  title =        "Designing Size Consistent Statistics for Accurate
                 Anomaly Detection in Dynamic Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "46:1--46:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3185059",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "An important task in network analysis is the detection
                 of anomalous events in a network time series. These
                 events could merely be times of interest in the network
                 timeline or they could be examples of malicious
                 activity or network malfunction. Hypothesis testing
                 using network statistics to summarize the behavior of
                 the network provides a robust framework for the anomaly
                 detection decision process. Unfortunately, choosing
                 network statistics that are dependent on confounding
                 factors like the total number of nodes or edges can
                 lead to incorrect conclusions (e.g., false positives
                 and false negatives). In this article, we describe the
                 challenges that face anomaly detection in dynamic
                 network streams regarding confounding factors. We also
                 provide two solutions to avoiding error due to
                 confounding factors: the first is a randomization
                 testing method that controls for confounding factors,
                 and the second is a set of size-consistent network
                 statistics that avoid confounding due to the most
                 common factors, edge count and node count.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nesa:2018:IIG,
  author =       "Nashreen Nesa and Tania Ghosh and Indrajit Banerjee",
  title =        "{iGRM}: Improved Grey Relational Model and Its
                 Ensembles for Occupancy Sensing in {Internet} of Things
                 Applications",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "47:1--47:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3186268",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Occupancy detection is one of the many applications of
                 Building Automation Systems (BAS) or Heating,
                 Ventilation, and Air Conditioning (HVAC) control
                 systems, especially, with the rising demand of Internet
                 of Things (IoT) services. This article describes the
                 fusion of data collected from sensors by exploiting
                 their potential to sense occupancy in a room. For this
                 purpose, a sensor test bed is deployed that includes
                 four sensors measuring temperature, relative humidity,
                 distance from the first obstacle, and light along with
                 a Arduino micro-controller to validate our model. In
                 addition, this article proposes three algorithms for
                 efficient fusion of the sensor data that is inspired by
                 the Grey theory. An improved Grey Relational Model
                 (iGRM) is proposed, which acts as the base classifier
                 for the other two algorithms, namely, Grey Relational
                 Model with Bagging (iGRM-BG) and Grey Relational Model
                 with Boosting (iGRM-BT). Furthermore, all three
                 algorithms use a sliding window concept, where only the
                 samples inside the window participate in model
                 training. Also, we have considered varying number of
                 window size for optimal comparison. The algorithms were
                 tested against the experimental data collected through
                 a test bed as well as on a publicly available large
                 dataset, where both the ensemble models, iGRM-BG and
                 iGRM-BT, are seen to enhance the performance of iGRM.
                 The results reveal exceptionally high performances with
                 accuracies above 95\% (iGRM) and up to 100\% (iGRM-BT)
                 for the experimental dataset and above 98.24\% (iGRM)
                 and up to 99.49\% (iGRM-BG) using the publicly
                 available dataset. Among the three proposed models,
                 iGRM-BG was observed to outperform both iGRM and
                 iGRM-BT owing to its advantage of being an ensemble
                 model and its robustness against over-fitting.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bressan:2018:MCB,
  author =       "Marco Bressan and Flavio Chierichetti and Ravi Kumar
                 and Stefano Leucci and Alessandro Panconesi",
  title =        "{Motif} Counting Beyond Five Nodes",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "48:1--48:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3186586",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Counting graphlets is a well-studied problem in graph
                 mining and social network analysis. Recently, several
                 papers explored very simple and natural algorithms
                 based on Monte Carlo sampling of Markov Chains (MC),
                 and reported encouraging results. We show, perhaps
                 surprisingly, that such algorithms are outperformed by
                 color coding (CC) [2], a sophisticated algorithmic
                 technique that we extend to the case of graphlet
                 sampling and for which we prove strong statistical
                 guarantees. Our computational experiments on graphs
                 with millions of nodes show CC to be more accurate than
                 MC; furthermore, we formally show that the mixing time
                 of the MC approach is too high in general, even when
                 the input graph has high conductance. All this comes at
                 a price however. While MC is very efficient in terms of
                 space, CC's memory requirements become demanding when
                 the size of the input graph and that of the graphlets
                 grow. And yet, our experiments show that CC can push
                 the limits of the state-of-the-art, both in terms of
                 the size of the input graph and of that of the
                 graphlets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nguyen:2018:EUP,
  author =       "Minh-Tien Nguyen and Duc-Vu Tran and Le-Minh Nguyen
                 and Xuan-Hieu Phan",
  title =        "Exploiting User Posts for {Web} Document
                 Summarization",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "49:1--49:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3186566",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Relevant user posts such as comments or tweets of a
                 Web document provide additional valuable information to
                 enrich the content of this document. When creating user
                 posts, readers tend to borrow salient words or phrases
                 in sentences. This can be considered as word variation.
                 This article proposes a framework that models the word
                 variation aspect to enhance the quality of Web document
                 summarization. Technically, the framework consists of
                 two steps: scoring and selection. In the first step,
                 the social information of a Web document such as user
                 posts is exploited to model intra-relations and
                 inter-relations in lexical and semantic levels. These
                 relations are denoted by a mutual reinforcement
                 similarity graph used to score each sentence and user
                 post. After scoring, summaries are extracted by using a
                 ranking approach or concept-based method formulated in
                 the form of Integer Linear Programming. To confirm the
                 efficiency of our framework, sentence and story
                 highlight extraction tasks were taken as a case study
                 on three datasets in two languages, English and
                 Vietnamese. Experimental results show that: (i) the
                 framework can improve ROUGE-scores compared to
                 state-of-the-art baselines of social context
                 summarization and (ii) the combination of the two
                 relations benefits the sentence extraction of single
                 Web documents.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2018:ERC,
  author =       "Bo Li and Yevgeniy Vorobeychik",
  title =        "Evasion-Robust Classification on Binary Domains",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "50:1--50:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3186282",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The success of classification learning has led to
                 numerous attempts to apply it in adversarial settings
                 such as spam and malware detection. The core challenge
                 in this class of applications is that adversaries are
                 not static, but make a deliberate effort to evade the
                 classifiers. We investigate both the problem of
                 modeling the objectives of such adversaries, as well as
                 the algorithmic problem of accounting for rational,
                 objective-driven adversaries. We first present a
                 general approach based on mixed-integer linear
                 programming (MILP) with constraint generation. This
                 approach is the first to compute an optimal solution to
                 adversarial loss minimization for two general classes
                 of adversarial evasion models in the context of binary
                 feature spaces. To further improve scalability and
                 significantly generalize the scope of the MILP-based
                 method, we propose a principled iterative retraining
                 framework, which can be used with arbitrary classifiers
                 and essentially arbitrary attack models. We show that
                 the retraining approach, when it converges, minimizes
                 an upper bound on adversarial loss. Extensive
                 experiments demonstrate that the mixed-integer
                 programming approach significantly outperforms several
                 state-of-the-art adversarial learning alternatives.
                 Moreover, the retraining framework performs nearly as
                 well, but scales significantly better. Finally, we show
                 that our approach is robust to misspecifications of the
                 adversarial model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mohammadi:2018:COA,
  author =       "Majid Mohammadi and Amir Ahooye Atashin and Wout
                 Hofman and Yaohua Tan",
  title =        "Comparison of Ontology Alignment Systems Across Single
                 Matching Task Via the {McNemar's} Test",
  journal =      j-TKDD,
  volume =       "12",
  number =       "4",
  pages =        "51:1--51:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3193573",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Ontology alignment is widely used to find the
                 correspondences between different ontologies in diverse
                 fields. After discovering the alignments, several
                 performance scores are available to evaluate them. The
                 scores typically require the identified alignment and a
                 reference containing the underlying actual
                 correspondences of the given ontologies. The current
                 trend in the alignment evaluation is to put forward a
                 new score (e.g., precision, weighted precision,
                 semantic precision, etc.) and to compare various
                 alignments by juxtaposing the obtained scores. However,
                 it is substantially provocative to select one measure
                 among others for comparison. On top of that, claiming
                 if one system has a better performance than one another
                 cannot be substantiated solely by comparing two
                 scalars. In this article, we propose the statistical
                 procedures that enable us to theoretically favor one
                 system over one another. The McNemar's test is the
                 statistical means by which the comparison of two
                 ontology alignment systems over one matching task is
                 drawn. The test applies to a 2 $ \times $ 2 contingency
                 table, which can be constructed in two different ways
                 based on the alignments, each of which has their own
                 merits/pitfalls. The ways of the contingency table
                 construction and various apposite statistics from the
                 McNemar's test are elaborated in minute detail. In the
                 case of having more than two alignment systems for
                 comparison, the family wise error rate is expected to
                 happen. Thus, the ways of preventing such an error are
                 also discussed. A directed graph visualizes the outcome
                 of the McNemar's test in the presence of multiple
                 alignment systems. From this graph, it is readily
                 understood if one system is better than one another or
                 if their differences are imperceptible. The proposed
                 statistical methodologies are applied to the systems
                 participated in the OAEI 2016 anatomy track, and also
                 compares several well-known similarity metrics for the
                 same matching problem.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lines:2018:TSC,
  author =       "Jason Lines and Sarah Taylor and Anthony Bagnall",
  title =        "Time Series Classification with {HIVE}-{COTE}: The
                 Hierarchical Vote Collective of Transformation-Based
                 Ensembles",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "52:1--52:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3182382",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A recent experimental evaluation assessed 19 time
                 series classification (TSC) algorithms and found that
                 one was significantly more accurate than all others:
                 the Flat Collective of Transformation-based Ensembles
                 (Flat-COTE). Flat-COTE is an ensemble that combines 35
                 classifiers over four data representations. However,
                 while comprehensive, the evaluation did not consider
                 deep learning approaches. Convolutional neural networks
                 (CNN) have seen a surge in popularity and are now state
                 of the art in many fields and raises the question of
                 whether CNNs could be equally transformative for TSC.
                 We implement a benchmark CNN for TSC using a common
                 structure and use results from a TSC-specific CNN from
                 the literature. We compare both to Flat-COTE and find
                 that the collective is significantly more accurate than
                 both CNNs. These results are impressive, but Flat-COTE
                 is not without deficiencies. We significantly improve
                 the collective by proposing a new hierarchical
                 structure with probabilistic voting, defining and
                 including two novel ensemble classifiers built in
                 existing feature spaces, and adding further modules to
                 represent two additional transformation domains. The
                 resulting classifier, the Hierarchical Vote Collective
                 of Transformation-based Ensembles (HIVE-COTE),
                 encapsulates classifiers built on five data
                 representations. We demonstrate that HIVE-COTE is
                 significantly more accurate than Flat-COTE (and all
                 other TSC algorithms that we are aware of) over 100
                 resamples of 85 TSC problems and is the new state of
                 the art for TSC. Further analysis is included through
                 the introduction and evaluation of 3 new case studies
                 and extensive experimentation on 1,000 simulated
                 datasets of 5 different types.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amornbunchornvej:2018:CED,
  author =       "Chainarong Amornbunchornvej and Ivan Brugere and
                 Ariana Strandburg-Peshkin and Damien R. Farine and
                 Margaret C. Crofoot and Tanya Y. Berger-Wolf",
  title =        "Coordination Event Detection and Initiator
                 Identification in Time Series Data",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "53:1--53:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201406",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Behavior initiation is a form of leadership and is an
                 important aspect of social organization that affects
                 the processes of group formation, dynamics, and
                 decision-making in human societies and other social
                 animal species. In this work, we formalize the
                 Coordination Initiator Inference Problem and propose a
                 simple yet powerful framework for extracting periods of
                 coordinated activity and determining individuals who
                 initiated this coordination, based solely on the
                 activity of individuals within a group during those
                 periods. The proposed approach, given arbitrary
                 individual time series, automatically (1) identifies
                 times of coordinated group activity, (2) determines the
                 identities of initiators of those activities, and (3)
                 classifies the likely mechanism by which the group
                 coordination occurred, all of which are novel
                 computational tasks. We demonstrate our framework on
                 both simulated and real-world data: trajectories
                 tracking of animals as well as stock market data. Our
                 method is competitive with existing global leadership
                 inference methods but provides the first approaches for
                 local leadership and coordination mechanism
                 classification. Our results are consistent with
                 ground-truthed biological data and the framework finds
                 many known events in financial data which are not
                 otherwise reflected in the aggregate NASDAQ index. Our
                 method is easily generalizable to any coordinated time
                 series data from interacting entities.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2018:ESC,
  author =       "Peipei Li and Haixun Wang and Hongsong Li and Xindong
                 Wu",
  title =        "Employing Semantic Context for Sparse Information
                 Extraction Assessment",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "54:1--54:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201407",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "A huge amount of texts available on the World Wide Web
                 presents an unprecedented opportunity for information
                 extraction (IE). One important assumption in IE is that
                 frequent extractions are more likely to be correct.
                 Sparse IE is hence a challenging task because no matter
                 how big a corpus is, there are extractions supported by
                 only a small amount of evidence in the corpus. However,
                 there is limited research on sparse IE, especially in
                 the assessment of the validity of sparse IEs. Motivated
                 by this, we introduce a lightweight, explicit semantic
                 approach for assessing sparse IE.$^1$ We first use a
                 large semantic network consisting of millions of
                 concepts, entities, and attributes to explicitly model
                 the context of any semantic relationship. Second, we
                 learn from three semantic contexts using different base
                 classifiers to select an optimal classification model
                 for assessing sparse extractions. Finally, experiments
                 show that as compared with several state-of-the-art
                 approaches, our approach can significantly improve the
                 $F$-score in the assessment of sparse extractions while
                 maintaining the efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bollegala:2018:CPM,
  author =       "Danushka Bollegala and Vincent Atanasov and Takanori
                 Maehara and Ken-Ichi Kawarabayashi",
  title =        "{ClassiNet} --- Predicting Missing Features for
                 Short-Text Classification",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "55:1--55:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201578",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Short and sparse texts such as tweets, search engine
                 snippets, product reviews, and chat messages are
                 abundant on the Web. Classifying such short-texts into
                 a pre-defined set of categories is a common problem
                 that arises in various contexts, such as sentiment
                 classification, spam detection, and information
                 recommendation. The fundamental problem in short-text
                 classification is feature sparseness --- the lack of
                 feature overlap between a trained model and a test
                 instance to be classified. We propose ClassiNet --- a
                 network of classifiers trained for predicting missing
                 features in a given instance, to overcome the feature
                 sparseness problem. Using a set of unlabeled training
                 instances, we first learn binary classifiers as feature
                 predictors for predicting whether a particular feature
                 occurs in a given instance. Next, each feature
                 predictor is represented as a vertex v$_i$ in the
                 ClassiNet, where a one-to-one correspondence exists
                 between feature predictors and vertices. The weight of
                 the directed edge e$_{ij}$ connecting a vertex v$_i$ to
                 a vertex v$_j$ represents the conditional probability
                 that given v$_i$ exists in an instance, v$_j$ also
                 exists in the same instance. We show that ClassiNets
                 generalize word co-occurrence graphs by considering
                 implicit co-occurrences between features. We extract
                 numerous features from the trained ClassiNet to
                 overcome feature sparseness. In particular, for a given
                 instance x, we find similar features from ClassiNet
                 that did not appear in x, and append those features in
                 the representation of x. Moreover, we propose a method
                 based on graph propagation to find features that are
                 indirectly related to a given short-text. We evaluate
                 ClassiNets on several benchmark datasets for short-text
                 classification. Our experimental results show that by
                 using ClassiNet, we can statistically significantly
                 improve the accuracy in short-text classification
                 tasks, without having to use any external resources
                 such as thesauri for finding related features.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qin:2018:STR,
  author =       "Tian Qin and Wufan Shangguan and Guojie Song and Jie
                 Tang",
  title =        "Spatio-Temporal Routine Mining on Mobile Phone Data",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "56:1--56:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201577",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Mining human behaviors has always been an important
                 subarea of Data Mining. While it provides empirical
                 evidences to psychological/behavioral studies, it also
                 builds the foundation of various big-data systems,
                 which rely heavily on the prediction of human
                 behaviors. In recent years, the ubiquitous spreading of
                 mobile phones and the massive amount of spatio-temporal
                 data collected from them make it possible to keep track
                 of the daily commute behaviors of mobile subscribers
                 and further conduct routine mining on them. In this
                 article, we propose to model mobile subscribers' daily
                 commute behaviors by three levels: location trajectory,
                 one-day pattern, and routine pattern. We develop the
                 model Spatio-Temporal Routine Mining Model (STRMM) to
                 characterize the generative process between these three
                 levels. From daily trajectories, the STRMM model
                 unsupervisedly extracts spatio-temporal routine
                 patterns that contain two aspects of information: (1)
                 How people's typical commute patterns are. (2) How much
                 their commute behaviors vary from day to day. Compared
                 to traditional methods, STRMM takes into account the
                 different degrees of behavioral uncertainty in
                 different timespans of a day, yielding more realistic
                 and intuitive results. To learn model parameters, we
                 adopt Stochastic Expectation Maximization algorithm.
                 Experiments are conducted on two real world datasets,
                 and the empirical results show that the STRMM model can
                 effectively discover hidden routine patterns of human
                 commute behaviors and yields higher accuracy results in
                 trajectory prediction task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2018:SRI,
  author =       "Ziqi Zhang and Jie Gao and Fabio Ciravegna",
  title =        "{SemRe-Rank}: Improving Automatic Term Extraction by
                 Incorporating Semantic Relatedness with Personalised
                 {PageRank}",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "57:1--57:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201408",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Automatic Term Extraction (ATE) deals with the
                 extraction of terminology from a domain specific
                 corpus, and has long been an established research area
                 in data and knowledge acquisition. ATE remains a
                 challenging task as it is known that there is no
                 existing ATE methods that can consistently outperform
                 others in any domain. This work adopts a refreshed
                 perspective to this problem: instead of searching for
                 such a `one-size-fit-all' solution that may never
                 exist, we propose to develop generic methods to
                 `enhance' existing ATE methods. We introduce
                 SemRe-Rank, the first method based on this principle,
                 to incorporate semantic relatedness-an often overlooked
                 venue-into an existing ATE method to further improve
                 its performance. SemRe-Rank incorporates word
                 embeddings into a personalised PageRank process to
                 compute `semantic importance' scores for candidate
                 terms from a graph of semantically related words
                 (nodes), which are then used to revise the scores of
                 candidate terms computed by a base ATE algorithm.
                 Extensively evaluated with 13 state-of-the-art base ATE
                 methods on four datasets of diverse nature, it is shown
                 to have achieved widespread improvement over all base
                 methods and across all datasets, with up to 15
                 percentage points when measured by the Precision in the
                 top ranked K candidate terms (the average for a set of
                 K 's), or up to 28 percentage points in F1 measured at
                 a K that equals to the expected real terms in the
                 candidates (F1 in short). Compared to an alternative
                 approach built on the well-known TextRank algorithm,
                 SemRe-Rank can potentially outperform by up to 8 points
                 in Precision at top K, or up to 17 points in F1.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hao:2018:OAL,
  author =       "Shuji Hao and Peiying Hu and Peilin Zhao and Steven C.
                 H. Hoi and Chunyan Miao",
  title =        "Online Active Learning with Expert Advice",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "58:1--58:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201604",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In literature, learning with expert advice methods
                 usually assume that a learner always obtain the true
                 label of every incoming training instance at the end of
                 each trial. However, in many real-world applications,
                 acquiring the true labels of all instances can be both
                 costly and time consuming, especially for large-scale
                 problems. For example, in the social media, data stream
                 usually comes in a high speed and volume, and it is
                 nearly impossible and highly costly to label all of the
                 instances. In this article, we address this problem
                 with active learning with expert advice, where the
                 ground truth of an instance is disclosed only when it
                 is requested by the proposed active query strategies.
                 Our goal is to minimize the number of requests while
                 training an online learning model without sacrificing
                 the performance. To address this challenge, we propose
                 a framework of active forecasters, which attempts to
                 extend two fully supervised forecasters, Exponentially
                 Weighted Average Forecaster and Greedy Forecaster, to
                 tackle the task of online active learning (OAL) with
                 expert advice. Specifically, we proposed two OAL with
                 expert advice algorithms, named Active Exponentially
                 Weighted Average Forecaster (AEWAF) and active greedy
                 forecaster (AGF), by considering the difference of
                 expert advices. To further improve the robustness of
                 the proposed AEWAF and AGF algorithms in the noisy
                 scenarios (where noisy experts exist), we also proposed
                 two robust active learning with expert advice
                 algorithms, named Robust Active Exponentially Weighted
                 Average Forecaster and Robust Active Greedy Forecaster.
                 We validate the efficacy of the proposed algorithms by
                 an extensive set of experiments in both normal
                 scenarios (where all of experts are comparably
                 reliable) and noisy scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Silva:2018:DMA,
  author =       "Fabr{\'\i}cio A. Silva and Augusto C. S. A. Domingues
                 and Thais R. M. Braga Silva",
  title =        "Discovering Mobile Application Usage Patterns from a
                 Large-Scale Dataset",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "59:1--59:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3209669",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The discovering of patterns regarding how, when, and
                 where users interact with mobile applications reveals
                 important insights for mobile service providers. In
                 this work, we exploit for the first time a real and
                 large-scale dataset representing the records of mobile
                 application usage of 5,342 users during 2014. The data
                 was collected by a software agent, installed at the
                 users' smartphones, which monitors detailed usage of
                 applications. First, we look for general patterns of
                 how users access some of the most popular mobile
                 applications in terms of frequency, duration,
                 diversity, and data traffic. Next, we mine the dataset
                 looking for temporal patterns in terms of when and how
                 often accesses occur. Finally, we exploit the location
                 of each access to detect users' points of interest and
                 location-based communities. Based on the results, we
                 derive a model to generate synthetic datasets of mobile
                 application usage and evaluate solutions to predict the
                 next application to be launched. We also discuss a
                 series of implications of the findings regarding
                 telecommunication services, mobile advertisements, and
                 smart cities. This is the first time this dataset is
                 used, and we also make it publicly available for other
                 researchers.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2018:CQE,
  author =       "Feijiang Li and Yuhua Qian and Jieting Wang and
                 Chuangyin Dang and Bing Liu",
  title =        "{Cluster}'s Quality Evaluation and Selective
                 Clustering Ensemble",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "60:1--60:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3211872",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering ensemble has drawn much attention in recent
                 years due to its ability to generate a high quality and
                 robust partition result. Weighted clustering ensemble
                 and selective clustering ensemble are two general ways
                 to further improve the performance of a clustering
                 ensemble method. Existing weighted clustering ensemble
                 methods assign the same weight to each cluster in a
                 partition of the ensemble. Since the qualities of the
                 clusters in a partition are different, the clusters
                 should be weighted differently. To address this issue,
                 this article proposes a new measure to calculate the
                 similarity between a cluster and a partition.
                 Theoretically, this measure is effective in handling
                 two problems in measuring the quality of a cluster,
                 which are defined as the symmetric problem and the
                 context meaning problem. In addition, some properties
                 of the proposed measure are analyzed. This measure can
                 be easily expanded to a clustering performance measure
                 that calculates the similarity between two partitions.
                 As a result of this measure, we propose a novel
                 selective clustering ensemble framework, which
                 considers the differences between the objective of the
                 ensemble selection stage and the object of the ensemble
                 integration stage in the selective clustering ensemble.
                 To verify the performance of the new measure, we
                 compare the performance of the measure with the two
                 existing measures in weighting clusters. The
                 experiments show that the proposed measure is more
                 effective. To verify the performance of the novel
                 framework, four existing state-of-the-art selective
                 clustering ensemble frameworks are employed as
                 references. The experiments show that the proposed
                 framework is statistically better than the others on 17
                 UCI benchmark datasets, 8 document datasets, and the
                 Olivetti Face Database.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Riondato:2018:AAB,
  author =       "Matteo Riondato and Eli Upfal",
  title =        "{ABRA}: Approximating Betweenness Centrality in Static
                 and Dynamic Graphs with {Rademacher} Averages",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "61:1--61:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3208351",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "ABPA$ \Xi $A$ \Sigma $ (ABRAXAS): Gnostic word of
                 mystic meaning. We present ABRA, a suite of algorithms
                 to compute and maintain probabilistically guaranteed
                 high-quality approximations of the betweenness
                 centrality of all nodes (or edges) on both static and
                 fully dynamic graphs. Our algorithms use progressive
                 random sampling and their analysis rely on Rademacher
                 averages and pseudodimension, fundamental concepts from
                 statistical learning theory. To our knowledge, ABRA is
                 the first application of these concepts to the field of
                 graph analysis. Our experimental results show that ABRA
                 is much faster than exact methods, and vastly
                 outperforms, in both runtime number of samples, and
                 accuracy, state-of-the-art algorithms with the same
                 quality guarantees.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{DosSantos:2018:RLC,
  author =       "Ludovic {Dos Santos} and Benjamin Piwowarski and
                 Ludovic Denoyer and Patrick Gallinari",
  title =        "Representation Learning for Classification in
                 Heterogeneous Graphs with Application to Social
                 Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "5",
  pages =        "62:1--62:??",
  month =        jul,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3201603",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:46 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "We address the task of node classification in
                 heterogeneous networks, where the nodes are of
                 different types, each type having its own set of
                 labels, and the relations between nodes may also be of
                 different types. A typical example is provided by
                 social networks where node types may for example be
                 users, content, or films, and relations friendship,
                 like, authorship. Learning and performing inference on
                 such heterogeneous networks is a recent task requiring
                 new models and algorithms. We propose a model, Labeling
                 Heterogeneous Network (LaHNet), a transductive approach
                 to classification that learns to project the different
                 types of nodes into a common latent space. This
                 embedding is learned so as to reflect different
                 characteristics of the problem such as the correlation
                 between node labels, as well as the graph topology. The
                 application focus is on social graphs, but the
                 algorithm is general and can be used for other domains.
                 The model is evaluated on five datasets representative
                 of different instances of social data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2018:CCE,
  author =       "Can Wang and Chi-Hung Chi and Zhong She and Longbing
                 Cao and Bela Stantic",
  title =        "Coupled Clustering Ensemble by Exploring Data
                 Interdependence",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "63:1--63:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230967",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Clustering ensembles combine multiple partitions of
                 data into a single clustering solution. It is an
                 effective technique for improving the quality of
                 clustering results. Current clustering ensemble
                 algorithms are usually built on the pairwise agreements
                 between clusterings that focus on the similarity via
                 consensus functions, between data objects that induce
                 similarity measures from partitions and re-cluster
                 objects, and between clusters that collapse groups of
                 clusters into meta-clusters. In most of those models,
                 there is a strong assumption on IIDness (i.e.,
                 independent and identical distribution), which states
                 that base clusterings perform independently of one
                 another and all objects are also independent. In the
                 real world, however, objects are generally likely
                 related to each other through features that are either
                 explicit or even implicit. There is also latent but
                 definite relationship among intermediate base
                 clusterings because they are derived from the same set
                 of data. All these demand a further investigation of
                 clustering ensembles that explores the interdependence
                 characteristics of data. To solve this problem, a new
                 coupled clustering ensemble ( CCE ) framework that
                 works on the interdependence nature of objects and
                 intermediate base clusterings is proposed in this
                 article. The main idea is to model the coupling
                 relationship between objects by aggregating the
                 similarity of base clusterings, and the interactive
                 relationship among objects by addressing their
                 neighborhood domains. Once these interdependence
                 relationships are discovered, they will act as critical
                 supplements to clustering ensembles. We verified our
                 proposed framework by using three types of consensus
                 function: clustering-based, object-based, and
                 cluster-based. Substantial experiments on multiple
                 synthetic and real-life benchmark datasets indicate
                 that CCE can effectively capture the implicit
                 interdependence relationships among base clusterings
                 and among objects with higher clustering accuracy,
                 stability, and robustness compared to 14
                 state-of-the-art techniques, supported by statistical
                 analysis. In addition, we show that the final
                 clustering quality is dependent on the data
                 characteristics (e.g., quality and consistency) of base
                 clusterings in terms of sensitivity analysis. Finally,
                 the applications in document clustering, as well as on
                 the datasets with much larger size and dimensionality,
                 further demonstrate the effectiveness, efficiency, and
                 scalability of our proposed models.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2018:EBQ,
  author =       "Zhipeng Huang and Bogdan Cautis and Reynold Cheng and
                 Yudian Zheng and Nikos Mamoulis and Jing Yan",
  title =        "Entity-Based Query Recommendation for Long-Tail
                 Queries",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "64:1--64:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233186",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Query recommendation, which suggests related queries
                 to search engine users, has attracted a lot of
                 attention in recent years. Most of the existing
                 solutions, which perform analysis of users' search
                 history (or query logs ), are often insufficient for
                 long-tail queries that rarely appear in query logs. To
                 handle such queries, we study the use of entities found
                 in queries to provide recommendations. Specifically, we
                 extract entities from a query, and use these entities
                 to explore new ones by consulting an information
                 source. The discovered entities are then used to
                 suggest new queries to the user. In this article, we
                 examine two information sources: (1) a knowledge base
                 (or KB), such as YAGO and Freebase; and (2) a click
                 log, which contains the URLs accessed by a query user.
                 We study how to use these sources to find new entities
                 useful for query recommendation. We further study a
                 hybrid framework that integrates different query
                 recommendation methods effectively. As shown in the
                 experiments, our proposed approaches provide better
                 recommendations than existing solutions for long-tail
                 queries. In addition, our query recommendation process
                 takes less than 100ms to complete. Thus, our solution
                 is suitable for providing online query recommendation
                 services for search engines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2018:MAD,
  author =       "Xiaoli Liu and Peng Cao and Andr{\'e} R.
                 Gon{\c{c}}alves and Dazhe Zhao and Arindam Banerjee",
  title =        "Modeling {Alzheimer}'s Disease Progression with Fused
                 {Laplacian} Sparse Group Lasso",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "65:1--65:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230668",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Alzheimer's disease (AD), the most common type of
                 dementia, not only imposes a huge financial burden on
                 the health care system, but also a psychological and
                 emotional burden on patients and their families. There
                 is thus an urgent need to infer trajectories of
                 cognitive performance over time and identify biomarkers
                 predictive of the progression. In this article, we
                 propose the multi-task learning with fused Laplacian
                 sparse group lasso model, which can identify biomarkers
                 closely related to cognitive measures due to its
                 sparsity-inducing property, and model the disease
                 progression with a general weighted (undirected)
                 dependency graphs among the tasks. An efficient
                 alternative directions method of multipliers based
                 optimization algorithm is derived to solve the proposed
                 non-smooth objective formulation. The effectiveness of
                 the proposed model is demonstrated by its superior
                 prediction performance over multiple state-of-the-art
                 methods and accurate identification of compact sets of
                 cognition-relevant imaging biomarkers that are
                 consistent with prior medical studies.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{He:2018:SRI,
  author =       "Xinran He and David Kempe",
  title =        "Stability and Robustness in Influence Maximization",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "66:1--66:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233227",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In the well-studied Influence Maximization problem,
                 the goal is to identify a set of k nodes in a social
                 network whose joint influence on the network is
                 maximized. A large body of recent work has justified
                 research on Influence Maximization models and
                 algorithms with their potential to create societal or
                 economic value. However, in order to live up to this
                 potential, the algorithms must be robust to large
                 amounts of noise, for they require quantitative
                 estimates of the influence, which individuals exert on
                 each other; ground truth for such quantities is
                 inaccessible, and even decent estimates are very
                 difficult to obtain. We begin to address this concern
                 formally. First, we exhibit simple inputs on which even
                 very small estimation errors may mislead every
                 algorithm into highly suboptimal solutions. Motivated
                 by this observation, we propose the Perturbation
                 Interval model as a framework to characterize the
                 stability of Influence Maximization against noise in
                 the inferred diffusion network. Analyzing the
                 susceptibility of specific instances to estimation
                 errors leads to a clean algorithmic question, which we
                 term the Influence Difference Maximization problem.
                 However, the objective function of Influence Difference
                 Maximization is NP-hard to approximate within a factor
                 of $ O(n^{(1 - \epsilon)}) $ for any $ \epsilon > 0 $.
                 Given the infeasibility of diagnosing instability
                 algorithmically, we focus on finding influential users
                 robustly across multiple diffusion settings. We define
                 a Robust Influence Maximization framework wherein an
                 algorithm is presented with a set of influence
                 functions. The algorithm's goal is to identify a set of
                 k nodes who are simultaneously influential for all
                 influence functions, compared to the
                 (function-specific) optimum solutions. We show strong
                 approximation hardness results for this problem unless
                 the algorithm gets to select at least a logarithmic
                 factor more seeds than the optimum solution. However,
                 when enough extra seeds may be selected, we show that
                 techniques of Krause et al. can be used to approximate
                 the optimum robust influence to within a factor of $ 1
                 - 1 / e $. We evaluate this bicriteria approximation
                 algorithm against natural heuristics on several
                 real-world datasets. Our experiments indicate that the
                 worst-case hardness does not necessarily translate into
                 bad performance on real-world datasets; all algorithms
                 perform fairly well.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Romero-Tris:2018:PPT,
  author =       "Cristina Romero-Tris and David Meg{\'\i}as",
  title =        "Protecting Privacy in Trajectories with a User-Centric
                 Approach",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "67:1--67:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3233185",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "The increased use of location-aware devices, such as
                 smartphones, generates a large amount of trajectory
                 data. These data can be useful in several domains, like
                 marketing, path modeling, localization of an epidemic
                 focus, and so on. Nevertheless, since trajectory
                 information contains personal mobility data, improper
                 use or publication of trajectory data can threaten
                 users' privacy. It may reveal sensitive details like
                 habits of behavior, religious beliefs, and sexual
                 preferences. Therefore, many users might be unwilling
                 to share their trajectory data without a previous
                 anonymization process. Currently, several proposals to
                 address this problem can be found in the literature.
                 These solutions focus on anonymizing data before its
                 publication, i.e., when they are already stored in the
                 server database. Nevertheless, we argue that this
                 approach gives the user no control about the
                 information she shares. For this reason, we propose
                 anonymizing data in the users' mobile devices, before
                 they are sent to a third party. This article extends
                 our previous work which was, to the best of our
                 knowledge, the first one to anonymize data at the
                 client side, allowing users to select the amount and
                 accuracy of shared data. In this article, we describe
                 an improved version of the protocol, and we include the
                 implementation together with an analysis of the results
                 obtained after the simulation with real trajectory
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ying:2018:FIG,
  author =       "Josh Jia-Ching Ying and Ji Zhang and Che-Wei Huang and
                 Kuan-Ta Chen and Vincent S. Tseng",
  title =        "{FrauDetector+}: an Incremental Graph-Mining Approach
                 for Efficient Fraudulent Phone Call Detection",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "68:1--68:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3234943",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "In recent years, telecommunication fraud has become
                 more rampant internationally with the development of
                 modern technology and global communication. Because of
                 rapid growth in the volume of call logs, the task of
                 fraudulent phone call detection is confronted with big
                 data issues in real-world implementations. Although our
                 previous work, FrauDetector, addressed this problem and
                 achieved some promising results, it can be further
                 enhanced because it focuses only on fraud detection
                 accuracy, whereas the efficiency and scalability are
                 not top priorities. Other known approaches for
                 fraudulent call number detection suffer from long
                 training times or cannot accurately detect fraudulent
                 phone calls in real time. However, the learning process
                 of FrauDetector is too time-consuming to support
                 real-world application. Although we have attempted to
                 accelerate the the learning process of FrauDetector by
                 parallelization, the parallelized learning process,
                 namely PFrauDetector, still cannot afford the computing
                 cost. In this article, we propose a highly efficient
                 incremental graph-mining-based fraudulent phone call
                 detection approach, namely FrauDetector$^+$, which can
                 automatically label fraudulent phone numbers with a
                 ``fraud'' tag a crucial prerequisite for distinguishing
                 fraudulent phone call numbers from nonfraudulent ones.
                 FrauDetector$^+$ initially generates smaller, more
                 manageable subnetworks from original graph and performs
                 a parallelized weighted HITS algorithm for a
                 significant speed increase in the graph learning
                 module. It adopts a novel aggregation approach to
                 generate a trust (or experience) value for each phone
                 number (or user) based on their respective local
                 values. After the initial procedure, we can
                 incrementally update the trust (or experience) value
                 for each phone number (or user) while a new fraud phone
                 number is identified. An efficient fraud-centric hash
                 structure is constructed to support fast real-time
                 detection of fraudulent phone numbers in the detection
                 module. We conduct a comprehensive experimental study
                 based on real datasets collected through an antifraud
                 mobile application called Whoscall. The results
                 demonstrate a significantly improved efficiency of our
                 approach compared with FrauDetector as well as superior
                 performance against other major classifier-based
                 methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Di:2018:LSA,
  author =       "Mingyang Di and Diego Klabjan and Long Sha and Patrick
                 Lucey",
  title =        "Large-Scale Adversarial Sports Play Retrieval with
                 Learning to Rank",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "69:1--69:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230667",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "As teams of professional leagues are becoming more and
                 more analytically driven, the interest in effective
                 data management and access of sports plays has
                 dramatically increased. In this article, we present a
                 retrieval system that can quickly find the most
                 relevant plays from historical games given an input
                 query. To search through a large number of games at an
                 interactive speed, our system is built upon a
                 distributed framework so that each query-result pair is
                 evaluated in parallel. We also propose a pairwise
                 learning to rank approach to improve search ranking
                 based on users' clickthrough behavior. The similarity
                 metric in training the rank function is based on
                 automatically learnt features from a convolutional
                 autoencoder. Finally, we showcase the efficacy of our
                 learning to rank approach by demonstrating rank quality
                 in a user study.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2018:GEF,
  author =       "Xiao Huang and Jundong Li and Na Zou and Xia Hu",
  title =        "A General Embedding Framework for Heterogeneous
                 Information Learning in Large-Scale Networks",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "70:1--70:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3241063",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Network analysis has been widely applied in many
                 real-world tasks, such as gene analysis and targeted
                 marketing. To extract effective features for these
                 analysis tasks, network embedding automatically learns
                 a low-dimensional vector representation for each node,
                 such that the meaningful topological proximity is well
                 preserved. While the embedding algorithms on pure
                 topological structure have attracted considerable
                 attention, in practice, nodes are often abundantly
                 accompanied with other types of meaningful information,
                 such as node attributes, second-order proximity, and
                 link directionality. A general framework for
                 incorporating the heterogeneous information into
                 network embedding could be potentially helpful in
                 learning better vector representations. However, it
                 remains a challenging task to jointly embed the
                 geometrical structure and a distinct type of
                 information due to the heterogeneity. In addition, the
                 real-world networks often contain a large number of
                 nodes, which put demands on the scalability of the
                 embedding algorithms. To bridge the gap, in this
                 article, we propose a general embedding framework named
                 Heterogeneous Information Learning in Large-scale
                 networks (HILL) to accelerate the joint learning. It
                 enables the simultaneous node proximity assessing
                 process to be done in a distributed manner by
                 decomposing the complex modeling and optimization into
                 many simple and independent sub-problems. We validate
                 the significant correlation between the heterogeneous
                 information and topological structure, and illustrate
                 the generalizability of HILL by applying it to perform
                 attributed network embedding and second-order proximity
                 learning. A variation is proposed for link
                 directionality modeling. Experimental results on
                 real-world networks demonstrate the effectiveness and
                 efficiency of HILL.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Park:2018:ETS,
  author =       "Ha-Myung Park and Francesco Silvestri and Rasmus Pagh
                 and Chin-Wan Chung and Sung-Hyon Myaeng and U. Kang",
  title =        "Enumerating Trillion Subgraphs On Distributed
                 Systems",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "71:1--71:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3237191",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "How can we find patterns from an enormous graph with
                 billions of vertices and edges? The subgraph
                 enumeration, which is to find patterns from a graph, is
                 an important task for graph data analysis with many
                 applications, including analyzing the social network
                 evolution, measuring the significance of motifs in
                 biological networks, observing the dynamics of
                 Internet, and so on. Especially, the triangle
                 enumeration, a special case of the subgraph
                 enumeration, where the pattern is a triangle, has many
                 applications such as identifying suspicious users in
                 social networks, detecting web spams, and finding
                 communities. However, recent networks are so large that
                 most of the previous algorithms fail to process them.
                 Recently, several MapReduce algorithms have been
                 proposed to address such large networks; however, they
                 suffer from the massive shuffled data resulting in a
                 very long processing time. In this article, we propose
                 scalable methods for enumerating trillion subgraphs on
                 distributed systems. We first propose PTE (
                 Pre-partitioned Triangle Enumeration ), a new
                 distributed algorithm for enumerating triangles in
                 enormous graphs by resolving the structural
                 inefficiency of the previous MapReduce algorithms. PTE
                 enumerates trillions of triangles in a billion scale
                 graph by decreasing three factors: the amount of
                 shuffled data, total work, and network read. We also
                 propose PSE ( Pre-partitioned Subgraph Enumeration ), a
                 generalized version of PTE for enumerating subgraphs
                 that match an arbitrary query graph. Experimental
                 results show that PTE provides 79 times faster
                 performance than recent distributed algorithms on
                 real-world graphs, and succeeds in enumerating more
                 than 3 trillion triangles on the ClueWeb12 graph with
                 6.3 billion vertices and 72 billion edges. Furthermore,
                 PSE successfully enumerates 265 trillion clique
                 subgraphs with 4 vertices from a subdomain hyperlink
                 network, showing 47 times faster performance than the
                 state of the art distributed subgraph enumeration
                 algorithm.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wen:2018:EAD,
  author =       "Xidao Wen and Yu-Ru Lin and Konstantinos Pelechrinis",
  title =        "Event Analytics via Discriminant Tensor
                 Factorization",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "72:1--72:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3184455",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Analyzing the impact of disastrous events has been
                 central to understanding and responding to crises.
                 Traditionally, the assessment of disaster impact has
                 primarily relied on the manual collection and analysis
                 of surveys and questionnaires as well as the review of
                 authority reports. This can be costly and
                 time-consuming, whereas a timely assessment of an
                 event's impact is critical for crisis management and
                 humanitarian operations. In this work, we formulate the
                 impact discovery as the problem to identify the shared
                 and discriminative subspace via tensor factorization
                 due to the multi-dimensional nature of mobility data.
                 Existing work in mining the shared and discriminative
                 subspaces typically requires the predefined number of
                 either type of them. In the context of event impact
                 discovery, this could be impractical, especially for
                 those unprecedented events. To overcome this, we
                 propose a new framework, called ``PairFac,'' that
                 jointly factorizes the multi-dimensional data to
                 discover the latent mobility pattern along with its
                 associated discriminative weight. This framework does
                 not require splitting the shared and discriminative
                 subspaces in advance and at the same time automatically
                 captures the persistent and changing patterns from
                 multi-dimensional behavioral data. Our work has
                 important applications in crisis management and urban
                 planning, which provides a timely assessment of impacts
                 of major events in the urban environment.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2018:SSL,
  author =       "Chaochao Chen and Kevin Chen-Chuan Chang and Qibing Li
                 and Xiaolin Zheng",
  title =        "Semi-supervised Learning Meets Factorization: Learning
                 to Recommend with Chain Graph Model",
  journal =      j-TKDD,
  volume =       "12",
  number =       "6",
  pages =        "73:1--73:??",
  month =        oct,
  year =         "2018",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3264745",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Recently, latent factor model (LFM) has been drawing
                 much attention in recommender systems due to its good
                 performance and scalability. However, existing LFMs
                 predict missing values in a user-item rating matrix
                 only based on the known ones, and thus the sparsity of
                 the rating matrix always limits their performance.
                 Meanwhile, semi-supervised learning (SSL) provides an
                 effective way to alleviate the label (i.e., rating)
                 sparsity problem by performing label propagation, which
                 is mainly based on the smoothness insight on affinity
                 graphs. However, graph-based SSL suffers serious
                 scalability and graph unreliable problems when directly
                 being applied to do recommendation. In this article, we
                 propose a novel probabilistic chain graph model (CGM)
                 to marry SSL with LFM. The proposed CGM is a
                 combination of Bayesian network and Markov random
                 field. The Bayesian network is used to model the rating
                 generation and regression procedures, and the Markov
                 random field is used to model the confidence-aware
                 smoothness constraint between the generated ratings.
                 Experimental results show that our proposed CGM
                 significantly outperforms the state-of-the-art
                 approaches in terms of four evaluation metrics, and
                 with a larger performance margin when data sparsity
                 increases.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "73",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Siddiqui:2019:SFE,
  author =       "Md Amran Siddiqui and Alan Fern and Thomas G.
                 Dietterich and Weng-Keen Wong",
  title =        "Sequential Feature Explanations for Anomaly
                 Detection",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3230666",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3230666",
  abstract =     "In many applications, an anomaly detection system
                 presents the most anomalous data instance to a human
                 analyst, who then must determine whether the instance
                 is truly of interest (e.g., a threat in a security
                 setting). Unfortunately, most anomaly detectors provide
                 no explanation about why an instance was considered
                 anomalous, leaving the analyst with no guidance about
                 where to begin the investigation. To address this
                 issue, we study the problems of computing and
                 evaluating sequential feature explanations (SFEs) for
                 anomaly detectors. An SFE of an anomaly is a sequence
                 of features, which are presented to the analyst one at
                 a time (in order) until the information contained in
                 the highlighted features is enough for the analyst to
                 make a confident judgement about the anomaly. Since
                 analyst effort is related to the amount of information
                 that they consider in an investigation, an
                 explanation's quality is related to the number of
                 features that must be revealed to attain confidence. In
                 this article, we first formulate the problem of
                 optimizing SFEs for a particular density-based anomaly
                 detector. We then present both greedy algorithms and an
                 optimal algorithm, based on branch-and-bound search,
                 for optimizing SFEs. Finally, we provide a large scale
                 quantitative evaluation of these algorithms using a
                 novel framework for evaluating explanations. The
                 results show that our algorithms are quite effective
                 and that our best greedy algorithm is competitive with
                 optimal solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2019:DDS,
  author =       "Xiaoming Liu and Chao Shen and Xiaohong Guan and
                 Yadong Zhou",
  title =        "Digger: Detect Similar Groups in Heterogeneous Social
                 Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3267106",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3267106",
  abstract =     "People participate in multiple online social networks,
                 e.g., Facebook, Twitter, and Linkedin, and these social
                 networks with heterogeneous social content and user
                 relationship are named as heterogeneous social
                 networks. Group structure widely exists in
                 heterogeneous social networks, which reveals the
                 evolution of human cooperation. Detecting similar
                 groups in heterogeneous networks has a great
                 significance for many applications, such as
                 recommendation system and spammer detection, using the
                 wealth of group information. Although promising, this
                 novel problem encounters a variety of technical
                 challenges, including incomplete data, high time
                 complexity, and ground truth. To address the research
                 gap and technical challenges, we take advantage of a
                 ratio-cut optimization function to model this novel
                 problem by the linear mixed-effects method and graph
                 spectral theory. Based on this model, we propose an
                 efficient algorithm called Digger to detect the similar
                 groups in the large graphs. Digger consists of three
                 steps, including measuring user similarity, construct a
                 matching graph, and detecting similar groups. We adopt
                 several strategies to lower the computational cost and
                 detail the basis of labeling the ground truth. We
                 evaluate the effectiveness and efficiency of our
                 algorithm on five different types of online social
                 networks. The extensive experiments show that our
                 method achieves 0.693, 0.783, and 0.735 in precision,
                 recall, and F1-measure, which significantly surpass the
                 state-of-arts by 24.4\%, 15.3\%, and 20.7\%,
                 respectively. The results demonstrate that our proposal
                 can detect similar groups in heterogeneous networks
                 effectively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lagree:2019:AOI,
  author =       "Paul Lagr{\'e}e and Olivier Capp{\'e} and Bogdan
                 Cautis and Silviu Maniu",
  title =        "Algorithms for Online Influencer Marketing",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3274670",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3274670",
  abstract =     "Influence maximization is the problem of finding
                 influential users, or nodes, in a graph so as to
                 maximize the spread of information. It has many
                 applications in advertising and marketing on social
                 networks. In this article, we study a highly generic
                 version of influence maximization, one of optimizing
                 influence campaigns by sequentially selecting ``spread
                 seeds'' from a set of influencers, a small subset of
                 the node population, under the hypothesis that, in a
                 given campaign, previously activated nodes remain
                 persistently active. This problem is in particular
                 relevant for an important form of online marketing,
                 known as influencer marketing, in which the marketers
                 target a sub-population of influential people, instead
                 of the entire base of potential buyers. Importantly, we
                 make no assumptions on the underlying diffusion model,
                 and we work in a setting where neither a diffusion
                 network nor historical activation data are available.
                 We call this problem online influencer marketing with
                 persistence (in short, OIMP). We first discuss
                 motivating scenarios and present our general approach.
                 We introduce an estimator on the influencers' remaining
                 potential --- the expected number of nodes that can
                 still be reached from a given influencer --- and
                 justify its strength to rapidly estimate the desired
                 value, relying on real data gathered from Twitter. We
                 then describe a novel algorithm, GT-UCB, relying on
                 probabilistic upper confidence bounds on the remaining
                 potential. We show that our approach leads to
                 high-quality spreads on both simulated and real
                 datasets. Importantly, it is orders of magnitude faster
                 than state-of-the-art influence maximization methods,
                 making it possible to deal with large-scale online
                 scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tao:2019:RSE,
  author =       "Zhiqiang Tao and Hongfu Liu and Sheng Li and Zhengming
                 Ding and Yun Fu",
  title =        "Robust Spectral Ensemble Clustering via Rank
                 Minimization",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3278606",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3278606",
  abstract =     "Ensemble Clustering (EC) is an important topic for
                 data cluster analysis. It targets to integrate multiple
                 Basic Partitions (BPs) of a particular dataset into a
                 consensus partition. Among previous works, one
                 promising and effective way is to transform EC as a
                 graph partitioning problem on the co-association
                 matrix, which is a pair-wise similarity matrix
                 summarized by all the BPs in essence. However, most
                 existing EC methods directly utilize the co-association
                 matrix, yet without considering various noises (e.g.,
                 the disagreement between different BPs and the
                 outliers) that may exist in it. These noises can impair
                 the cluster structure of a co-association matrix, and
                 thus mislead the final graph partitioning process. To
                 address this challenge, we propose a novel Robust
                 Spectral Ensemble Clustering (RSEC) algorithm in this
                 article. Specifically, we learn low-rank representation
                 (LRR) for the co-association matrix to uncover its
                 cluster structure and handle the noises, and meanwhile,
                 we perform spectral clustering with the learned
                 representation to seek for a consensus partition. These
                 two steps are jointly proceeded within a unified
                 optimization framework. In particular, during the
                 optimizing process, we leverage consensus partition to
                 iteratively enhance the block-diagonal structure of
                 LRR, in order to assist the graph partitioning. To
                 solve RSEC, we first formulate it by using nuclear norm
                 as a convex proxy to the rank function. Then, motivated
                 by the recent advances in non-convex rank minimization,
                 we further develop a non-convex model for RSEC and
                 provide it a solution by the majorization--minimization
                 Augmented Lagrange Multiplier algorithm. Experiments on
                 18 real-world datasets demonstrate the effectiveness of
                 our algorithm compared with state-of-the-art methods.
                 Moreover, several impact factors on the clustering
                 performance of our approach are also explored
                 extensively.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jaysawal:2019:PAP,
  author =       "Bijay Prasad Jaysawal and Jen-Wei Huang",
  title =        "{PSP}-{AMS}: Progressive Mining of Sequential Patterns
                 Across Multiple Streams",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3281632",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3281632",
  abstract =     "Sequential pattern mining is used to find frequent
                 data sequences over time. When sequential patterns are
                 generated, the newly arriving patterns may not be
                 identified as frequent sequential patterns due to the
                 existence of old data and sequences. Progressive
                 sequential pattern mining aims to find the most
                 up-to-date sequential patterns given that obsolete
                 items will be deleted from the sequences. When
                 sequences come with multiple data streams, it is
                 difficult to maintain and update the current sequential
                 patterns. Even worse, when we consider the sequences
                 across multiple streams, previous methods cannot
                 efficiently compute the frequent sequential patterns.
                 In this work, we propose an efficient algorithm PSP-AMS
                 to address this problem. PSP-AMS uses a novel data
                 structure PSP-MS-tree to insert new items, update
                 current items, and delete obsolete items. By
                 maintaining a PSP-MS-tree, PSP-AMS efficiently finds
                 the frequent sequential patterns across multiple
                 streams. The experimental results show that PSP-AMS
                 significantly outperforms previous algorithms for
                 mining of progressive sequential patterns across
                 multiple streams on synthetic data as well as real
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Song:2019:TCA,
  author =       "Qingquan Song and Hancheng Ge and James Caverlee and
                 Xia Hu",
  title =        "Tensor Completion Algorithms in Big Data Analytics",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3278607",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3278607",
  abstract =     "Tensor completion is a problem of filling the missing
                 or unobserved entries of partially observed tensors.
                 Due to the multidimensional character of tensors in
                 describing complex datasets, tensor completion
                 algorithms and their applications have received wide
                 attention and achievement in areas like data mining,
                 computer vision, signal processing, and neuroscience.
                 In this survey, we provide a modern overview of recent
                 advances in tensor completion algorithms from the
                 perspective of big data analytics characterized by
                 diverse variety, large volume, and high velocity. We
                 characterize these advances from the following four
                 perspectives: general tensor completion algorithms,
                 tensor completion with auxiliary information (variety),
                 scalable tensor completion algorithms (volume), and
                 dynamic tensor completion algorithms (velocity).
                 Further, we identify several tensor completion
                 applications on real-world data-driven problems and
                 present some common experimental frameworks popularized
                 in the literature along with several available software
                 repositories. Our goal is to summarize these popular
                 methods and introduce them to researchers and
                 practitioners for promoting future research and
                 applications. We conclude with a discussion of key
                 challenges and promising research directions in this
                 community for future exploration.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Moghaz:2019:TME,
  author =       "Dror Moghaz and Yaakov Hacohen-Kerner and Dov Gabbay",
  title =        "Text Mining for Evaluating Authors' Birth and Death
                 Years",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3281631",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3281631",
  abstract =     "This article presents a unique method in text and data
                 mining for finding the era, i.e., mining temporal data,
                 in which an anonymous author was living. Finding this
                 era can assist in the examination of a fake document or
                 extracting the time period in which a writer lived. The
                 study and the experiments concern Hebrew, and in some
                 parts, Aramaic and Yiddish rabbinic texts. The rabbinic
                 texts are undated and contain no bibliographic
                 sections, posing an interesting challenge. This work
                 proposes algorithms using key phrases and key words
                 that allow the temporal organization of citations
                 together with linguistic patterns. Based on these key
                 phrases, key words, and the references, we established
                 several types of ``Iron-clad,'' Heuristic and Greedy
                 rules for estimating the years of birth and death of a
                 writer in an interesting classification task.
                 Experiments were conducted on corpora, including
                 documents authored by 12, 24, and 36 rabbinic writers
                 and demonstrated promising results.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2019:DRW,
  author =       "Hung-Hsuan Chen and Pu Chen",
  title =        "Differentiating Regularization Weights --- A Simple
                 Mechanism to Alleviate Cold Start in Recommender
                 Systems",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3285954",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3285954",
  abstract =     "Matrix factorization (MF) and its extended
                 methodologies have been studied extensively in the
                 community of recommender systems in the last decade.
                 Essentially, MF attempts to search for low-ranked
                 matrices that can (1) best approximate the known rating
                 scores, and (2) maintain low Frobenius norm for the
                 low-ranked matrices to prevent overfitting. Since the
                 two objectives conflict with each other, the common
                 practice is to assign the relative importance weights
                 as the hyper-parameters to these objectives. The two
                 low-ranked matrices returned by MF are often
                 interpreted as the latent factors of a user and the
                 latent factors of an item that would affect the rating
                 of the user on the item. As a result, it is typical
                 that, in the loss function, we assign a regularization
                 weight $ \lambda_p $ on the norms of the latent factors
                 for all users, and another regularization weight $
                 \lambda_q $ on the norms of the latent factors for all
                 the items. We argue that such a methodology probably
                 over-simplifies the scenario. Alternatively, we
                 probably should assign lower constraints to the latent
                 factors associated with the items or users that reveal
                 more information, and set higher constraints to the
                 others. In this article, we systematically study this
                 topic. We found that such a simple technique can
                 improve the prediction results of the MF-based
                 approaches based on several public datasets.
                 Specifically, we applied the proposed methodology on
                 three baseline models --- SVD, SVD++, and the NMF
                 models. We found that this technique improves the
                 prediction accuracy for all these baseline models.
                 Perhaps more importantly, this technique better
                 predicts the ratings on the long-tail items, i.e., the
                 items that were rated/viewed/purchased by few users.
                 This suggests that this approach may partially remedy
                 the cold-start issue. The proposed method is very
                 general and can be easily applied on various
                 recommendation models, such as Factorization Machines,
                 Field-aware Factorization Machines, Factorizing
                 Personalized Markov Chains, Prod2Vec, Behavior2Vec, and
                 so on. We release the code for reproducibility. We
                 implemented a Python package that integrates the
                 proposed regularization technique with the SVD, SVD++,
                 and the NMF model. The package can be accessed at
                 https://github.com/ncu-dart/rdf.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sahoo:2019:LSO,
  author =       "Doyen Sahoo and Steven C. H. Hoi and Bin Li",
  title =        "Large Scale Online Multiple Kernel Regression with
                 Application to Time-Series Prediction",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3299875",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Kernel-based regression represents an important family
                 of learning techniques for solving challenging
                 regression tasks with non-linear patterns. Despite
                 being studied extensively, most of the existing work
                 suffers from two major drawbacks as follows: (i) they
                 are often designed for solving regression tasks in a
                 batch learning setting, making them not only
                 computationally inefficient and but also poorly
                 scalable in real-world applications where data arrives
                 sequentially; and (ii) they usually assume that a fixed
                 kernel function is given prior to the learning task,
                 which could result in poor performance if the chosen
                 kernel is inappropriate. To overcome these drawbacks,
                 this work presents a novel scheme of Online Multiple
                 Kernel Regression (OMKR), which sequentially learns the
                 kernel-based regressor in an online and scalable
                 fashion, and dynamically explore a pool of multiple
                 diverse kernels to avoid suffering from a single fixed
                 poor kernel so as to remedy the drawback of
                 manual/heuristic kernel selection. The OMKR problem is
                 more challenging than regular kernel-based regression
                 tasks since we have to on-the-fly determine both the
                 optimal kernel-based regressor for each individual
                 kernel and the best combination of the multiple kernel
                 regressors. We propose a family of OMKR algorithms for
                 regression and discuss their application to time series
                 prediction tasks including application to AR, ARMA, and
                 ARIMA time series. We develop novel approaches to make
                 OMKR scalable for large datasets, to counter the
                 problems arising from an unbounded number of support
                 vectors. We also explore the effect of kernel
                 combination at prediction level and at the
                 representation level. Finally, we conduct extensive
                 experiments to evaluate the empirical performance on
                 both real-world regression and times series prediction
                 tasks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Barton:2019:CIG,
  author =       "Tomas Barton and Tomas Bruna and Pavel Kordik",
  title =        "Chameleon 2: an Improved Graph-Based Clustering
                 Algorithm",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3299876",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Traditional clustering algorithms fail to produce
                 human-like results when confronted with data of
                 variable density, complex distributions, or in the
                 presence of noise. We propose an improved graph-based
                 clustering algorithm called Chameleon 2, which
                 overcomes several drawbacks of state-of-the-art
                 clustering approaches. We modified the internal cluster
                 quality measure and added an extra step to ensure
                 algorithm robustness. Our results reveal a significant
                 positive impact on the clustering quality measured by
                 Normalized Mutual Information on 32 artificial datasets
                 used in the clustering literature. This significant
                 improvement is also confirmed on real-world datasets.
                 The performance of clustering algorithms such as DBSCAN
                 is extremely parameter sensitive, and exhaustive manual
                 parameter tuning is necessary to obtain a meaningful
                 result. All hierarchical clustering methods are very
                 sensitive to cutoff selection, and a human expert is
                 often required to find the true cutoff for each
                 clustering result. We present an automated cutoff
                 selection method that enables the Chameleon 2 algorithm
                 to generate high-quality clustering in autonomous
                 mode.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Murai:2019:CDU,
  author =       "Fabricio Murai and Bruno Ribeiro and Don Towlsey and
                 Pinghui Wang",
  title =        "Characterizing Directed and Undirected Networks via
                 Multidimensional Walks with Jumps",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3299877",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Estimating distributions of node characteristics
                 (labels) such as number of connections or citizenship
                 of users in a social network via edge and node sampling
                 is a vital part of the study of complex networks. Due
                 to its low cost, sampling via a random walk (RW) has
                 been proposed as an attractive solution to this task.
                 Most RW methods assume either that the network is
                 undirected or that walkers can traverse edges
                 regardless of their direction. Some RW methods have
                 been designed for directed networks where edges coming
                 into a node are not directly observable. In this work,
                 we propose Directed Unbiased Frontier Sampling (DUFS),
                 a sampling method based on a large number of
                 coordinated walkers, each starting from a node chosen
                 uniformly at random. It applies to directed networks
                 with invisible incoming edges because it constructs, in
                 real time, an undirected graph consistent with the
                 walkers trajectories, and its use of random jumps to
                 prevent walkers from being trapped. DUFS generalizes
                 previous RW methods and is suited for undirected
                 networks and to directed networks regardless of in-edge
                 visibility. We also propose an improved estimator of
                 node label distribution that combines information from
                 initial walker locations with subsequent RW
                 observations. We evaluate DUFS, compare it to other RW
                 methods, investigate the impact of its parameters on
                 estimation accuracy and provide practical guidelines
                 for choosing them. In estimating out-degree
                 distributions, DUFS yields significantly better
                 estimates of the head of the distribution than other
                 methods, while matching or exceeding estimation
                 accuracy of the tail. Last, we show that DUFS
                 outperforms uniform sampling when estimating
                 distributions of node labels of the top 10\% largest
                 degree nodes, even when sampling a node uniformly has
                 the same cost as RW steps.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2019:DAA,
  author =       "Huan Wang and Jia Wu and Wenbin Hu and Xindong Wu",
  title =        "Detecting and Assessing Anomalous Evolutionary
                 Behaviors of Nodes in Evolving Social Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3299886",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  abstract =     "Based on the performance of entire social networks,
                 anomaly analysis for evolving social networks generally
                 ignores the otherness of the evolutionary behaviors of
                 different nodes, such that it is difficult to precisely
                 identify the anomalous evolutionary behaviors of nodes
                 ( AEBN ). Assuming that a node's evolutionary behavior
                 that generates and removes edges normally follows
                 stable evolutionary mechanisms, this study focuses on
                 detecting and assessing AEBN, whose evolutionary
                 mechanisms deviate from their past mechanisms, and
                 proposes a link prediction detection ( LPD ) method and
                 a matrix perturbation assessment ( MPA ) method. LPD
                 describes a node's evolutionary behavior by fitting its
                 evolutionary mechanism, and designs indexes for edge
                 generation and removal to evaluate the extent to which
                 the evolutionary mechanism of a node's evolutionary
                 behavior can be fitted by a link prediction algorithm.
                 Furthermore, it detects AEBN by quantifying the
                 differences among behavior vectors that characterize
                 the node's evolutionary behaviors in different periods.
                 In addition, MPA considers AEBN as a perturbation of
                 the social network structure, and quantifies the effect
                 of AEBN on the social network structure based on matrix
                 perturbation analysis. Extensive experiments on eight
                 disparate real-world networks demonstrate that
                 analyzing AEBN from the perspective of evolutionary
                 mechanisms is important and beneficial.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{vanLeeuwen:2019:ASI,
  author =       "Matthijs van Leeuwen and Polo Chau and Jilles Vreeken
                 and Dafna Shahaf and Christos Faloutsos",
  title =        "Addendum to the Special Issue on {Interactive Data
                 Exploration and Analytics (TKDD, Vol. 12, Iss. 1):
                 Introduction by the Guest Editors}",
  journal =      j-TKDD,
  volume =       "13",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3298786",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jan 29 17:18:49 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Katib:2019:FAS,
  author =       "Anas Katib and Praveen Rao and Kobus Barnard and
                 Charles Kamhoua",
  title =        "Fast Approximate Score Computation on Large-Scale
                 Distributed Data for Learning Multinomial {Bayesian}
                 Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "14:1--14:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301304",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301304",
  abstract =     "In this article, we focus on the problem of learning a
                 Bayesian network over distributed data stored in a
                 commodity cluster. Specifically, we address the
                 challenge of computing the scoring function over
                 distributed data in an efficient and scalable manner,
                 which is a fundamental task during learning. While
                 exact score computation can be done using the
                 MapReduce-style computation, our goal is to compute
                 approximate scores much faster with probabilistic error
                 bounds and in a scalable manner. We propose a novel
                 approach, which is designed to achieve the following:
                 (a) decentralized score computation using the principle
                 of gossiping; (b) lower resource consumption via a
                 probabilistic approach for maintaining scores using the
                 properties of a Markov chain; and (c) effective
                 distribution of tasks during score computation (on
                 large datasets) by synergistically combining well-known
                 hashing techniques. We conduct theoretical analysis of
                 our approach in terms of convergence speed of the
                 statistics required for score computation, and memory
                 and network bandwidth consumption. We also discuss how
                 our approach is capable of efficiently recomputing
                 scores when new data are available. We conducted a
                 comprehensive evaluation of our approach and compared
                 with the MapReduce-style computation using datasets of
                 different characteristics on a 16-node cluster. When
                 the MapReduce-style computation provided exact
                 statistics for score computation, it was nearly 10
                 times slower than our approach. Although it ran faster
                 on randomly sampled datasets than on the entire
                 datasets, it performed worse than our approach in terms
                 of accuracy. Our approach achieved high accuracy (below
                 6\% average relative error) in estimating the
                 statistics for approximate score computation on all the
                 tested datasets. In conclusion, it provides a feasible
                 tradeoff between computation time and accuracy for fast
                 approximate score computation on large-scale
                 distributed data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2019:TEM,
  author =       "Xiaofeng Gao and Zhenhao Cao and Sha Li and Bin Yao
                 and Guihai Chen and Shaojie Tang",
  title =        "Taxonomy and Evaluation for Microblog Popularity
                 Prediction",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "15:1--15:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301303",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301303",
  abstract =     "As social networks become a major source of
                 information, predicting the outcome of information
                 diffusion has appeared intriguing to both researchers
                 and practitioners. By organizing and categorizing the
                 joint efforts of numerous studies on popularity
                 prediction, this article presents a hierarchical
                 taxonomy and helps to establish a systematic overview
                 of popularity prediction methods for microblog.
                 Specifically, we uncover three lines of thoughts: the
                 feature-based approach, time-series modelling, and the
                 collaborative filtering approach and analyse them,
                 respectively. Furthermore, we also categorize
                 prediction methods based on their underlying rationale:
                 whether they attempt to model the motivation of users
                 or monitor the early responses. Finally, we put these
                 prediction methods to test by performing experiments on
                 real-life data collected from popular social networks
                 Twitter and Weibo. We compare the methods in terms of
                 accuracy, efficiency, timeliness, robustness, and bias.
                 As far as we are concerned, there is no precedented
                 survey aimed at microblog popularity prediction at the
                 time of submission. By establishing a taxonomy and
                 evaluation for the first time, we hope to provide an
                 in-depth review of state-of-the-art prediction methods
                 and point out directions for further research. Our
                 evaluations show that time-series modelling has the
                 advantage of high accuracy and the ability to improve
                 over time. The feature-based methods using only
                 temporal features performs nearly as well as using all
                 possible features, producing average results. This
                 suggests that temporal features do have strong
                 predictive power and that power is better exploited
                 with time-series models. On the other hand, this
                 implies that we know little about the future popularity
                 of an item before it is posted, which may be the focus
                 of further research.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yan:2019:RBT,
  author =       "Ruidong Yan and Yi Li and Weili Wu and Deying Li and
                 Yongcai Wang",
  title =        "Rumor Blocking through Online Link Deletion on Social
                 Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "16:1--16:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301302",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301302",
  abstract =     "In recent years, social networks have become important
                 platforms for people to disseminate information.
                 However, we need to take effective measures such as
                 blocking a set of links to control the negative rumors
                 spreading over the network. In this article, we propose
                 a Rumor Spread Minimization (RSM) problem, i.e., we
                 remove an edge set from network such that the rumor
                 spread is minimized. We first prove the objective
                 function of RSM problem is not submodular. Then, we
                 propose both submodular lower-bound and upper-bound of
                 the objective function. Next, we develop a heuristic
                 algorithm to approximate the objective function.
                 Furthermore, we reformulate our objective function as
                 the DS function (the Difference of Submodular
                 functions). Finally, we conduct experiments on
                 real-world datasets to evaluate our proposed method.
                 The experiment results show that the upper and lower
                 bounds are very close, which indicates the good quality
                 of them. And, the proposed method outperforms the
                 comparison methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Teinemaa:2019:OOP,
  author =       "Irene Teinemaa and Marlon Dumas and Marcello {La Rosa}
                 and Fabrizio Maria Maggi",
  title =        "Outcome-Oriented Predictive Process Monitoring: Review
                 and Benchmark",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "17:1--17:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3301300",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3301300",
  abstract =     "Predictive business process monitoring refers to the
                 act of making predictions about the future state of
                 ongoing cases of a business process, based on their
                 incomplete execution traces and logs of historical
                 (completed) traces. Motivated by the increasingly
                 pervasive availability of fine-grained event data about
                 business process executions, the problem of predictive
                 process monitoring has received substantial attention
                 in the past years. In particular, a considerable number
                 of methods have been put forward to address the problem
                 of outcome-oriented predictive process monitoring,
                 which refers to classifying each ongoing case of a
                 process according to a given set of possible
                 categorical outcomes --- e.g., Will the customer
                 complain or not? Will an order be delivered, canceled,
                 or withdrawn? Unfortunately, different authors have
                 used different datasets, experimental settings,
                 evaluation measures, and baselines to assess their
                 proposals, resulting in poor comparability and an
                 unclear picture of the relative merits and
                 applicability of different methods. To address this
                 gap, this article presents a systematic review and
                 taxonomy of outcome-oriented predictive process
                 monitoring methods, and a comparative experimental
                 evaluation of eleven representative methods using a
                 benchmark covering 24 predictive process monitoring
                 tasks based on nine real-life event logs.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ma:2019:PBD,
  author =       "Liang Ma and Mudhakar Srivatsa and Derya Cansever and
                 Xifeng Yan and Sue Kase and Michelle Vanni",
  title =        "Performance Bounds of Decentralized Search in Expert
                 Networks for Query Answering",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "18:1--18:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3300230",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3300230",
  abstract =     "Expert networks are formed by a group of
                 expert-professionals with different specialties to
                 collaboratively resolve specific queries posted to the
                 network. In such networks, when a query reaches an
                 expert who does not have sufficient expertise, this
                 query needs to be routed to other experts for further
                 processing until it is completely solved; therefore,
                 query answering efficiency is sensitive to the
                 underlying query routing mechanism being used. Among
                 all possible query routing mechanisms, decentralized
                 search, operating purely on each expert's local
                 information without any knowledge of network global
                 structure, represents the most basic and scalable
                 routing mechanism, which is applicable to any network
                 scenarios even in dynamic networks. However, there is
                 still a lack of fundamental understanding of the
                 efficiency of decentralized search in expert networks.
                 In this regard, we investigate decentralized search by
                 quantifying its performance under a variety of network
                 settings. Our key findings reveal the existence of
                 network conditions, under which decentralized search
                 can achieve significantly short query routing paths
                 (i.e., between $ O(\log n) $ and $ O(\log^2 n) $ hops,
                 $n$: total number of experts in the network). Based on
                 such theoretical foundation, we further study how the
                 unique properties of decentralized search in expert
                 networks are related to the anecdotal small-world
                 phenomenon. In addition, we demonstrate that
                 decentralized search is robust against estimation
                 errors introduced by misinterpreting the required
                 expertise levels. The developed performance bounds,
                 confirmed by real datasets, are able to assist in
                 predicting network performance and designing complex
                 expert networks.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jha:2019:DMD,
  author =       "Kishlay Jha and Guangxu Xun and Vishrawas
                 Gopalakrishnan and Aidong Zhang",
  title =        "{DWE-Med}: Dynamic Word Embeddings for Medical
                 Domain",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "19:1--19:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310254",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3310254",
  abstract =     "Recent advances in unsupervised language processing
                 methods have created an opportunity to exploit massive
                 text corpora for developing high-quality vector space
                 representation (also known as word embeddings) of
                 words. Towards this direction, practitioners have
                 developed and applied several data driven embedding
                 models with quite good rate of success. However, a
                 drawback of these models lies in their premise of
                 static context; wherein, the meaning of a word is
                 assumed to remain the same over the period of time.
                 This is limiting because it is known that the semantic
                 meaning of a concept evolves over time. While such
                 semantic drifts are routinely observed in almost all
                 the domains; their effect is acute in domain such as
                 biomedicine, where the semantic meaning of a concept
                 changes relatively fast. To address this, in this
                 study, we aim to learn temporally aware vector
                 representation of medical concepts from the timestamped
                 text data, and in doing so provide a systematic
                 approach to formalize the problem. More specifically, a
                 dynamic word embedding based model that jointly learns
                 the temporal characteristics of medical concepts and
                 performs across time-alignment is proposed. Apart from
                 capturing the evolutionary characteristics in an
                 optimal manner, the model also factors in the implicit
                 medical properties useful for a variety of bio-medical
                 applications. Empirical studies conducted on two
                 important bio-medical use cases validates the
                 effectiveness of the proposed approach and suggests
                 that the model not only learns quality embeddings but
                 also facilitates intuitive trajectory visualizations.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cadena:2019:NOP,
  author =       "Jose Cadena and Feng Chen and Anil Vullikanti",
  title =        "Near-Optimal and Practical Algorithms for Graph Scan
                 Statistics with Connectivity Constraints",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "20:1--20:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309712",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309712",
  abstract =     "One fundamental task in network analysis is detecting
                 ``hotspots'' or ``anomalies'' in the network; that is,
                 detecting subgraphs where there is significantly more
                 activity than one would expect given historical data or
                 some baseline process. Scan statistics is one popular
                 approach used for anomalous subgraph detection. This
                 methodology involves maximizing a score function over
                 all connected subgraphs, which is a challenging
                 computational problem. A number of heuristics have been
                 proposed for these problems, but they do not provide
                 any quality guarantees. Here, we propose a framework
                 for designing algorithms for optimizing a large class
                 of scan statistics for networks, subject to
                 connectivity constraints. Our algorithms run in time
                 that scales linearly on the size of the graph and
                 depends on a parameter we call the ``effective solution
                 size,'' while providing rigorous approximation
                 guarantees. In contrast, most prior methods have
                 super-linear running times in terms of graph size.
                 Extensive empirical evidence demonstrates the
                 effectiveness and efficiency of our proposed algorithms
                 in comparison with state-of-the-art methods. Our
                 approach improves on the performance relative to all
                 prior methods, giving up to over 25\% increase in the
                 score. Further, our algorithms scale to networks with
                 up to a million nodes, which is 1--2 orders of
                 magnitude larger than all prior applications.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2019:PFS,
  author =       "Bingbing Jiang and Chang Li and Maarten {De Rijke} and
                 Xin Yao and Huanhuan Chen",
  title =        "Probabilistic Feature Selection and Classification
                 Vector Machine",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "21:1--21:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3309541",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3309541",
  abstract =     "Sparse Bayesian learning is a state-of-the-art
                 supervised learning algorithm that can choose a subset
                 of relevant samples from the input data and make
                 reliable probabilistic predictions. However, in the
                 presence of high-dimensional data with irrelevant
                 features, traditional sparse Bayesian classifiers
                 suffer from performance degradation and low efficiency
                 due to the incapability of eliminating irrelevant
                 features. To tackle this problem, we propose a novel
                 sparse Bayesian embedded feature selection algorithm
                 that adopts truncated Gaussian distributions as both
                 sample and feature priors. The proposed algorithm,
                 called probabilistic feature selection and
                 classification vector machine (PFCVM$_{LP}$) is able to
                 simultaneously select relevant features and samples for
                 classification tasks. In order to derive the analytical
                 solutions, Laplace approximation is applied to compute
                 approximate posteriors and marginal likelihoods.
                 Finally, parameters and hyperparameters are optimized
                 by the type-II maximum likelihood method. Experiments
                 on three datasets validate the performance of
                 PFCVM$_{LP}$ along two dimensions: classification
                 performance and effectiveness for feature selection.
                 Finally, we analyze the generalization performance and
                 derive a generalization error bound for PFCVM$_{LP}$.
                 By tightening the bound, the importance of feature
                 selection is demonstrated.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2019:FST,
  author =       "Zheng Wang and Xiaojun Ye and Chaokun Wang and Philip
                 S. Yu",
  title =        "Feature Selection via Transferring Knowledge Across
                 Different Classes",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "22:1--22:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314202",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314202",
  abstract =     "The problem of feature selection has attracted
                 considerable research interest in recent years.
                 Supervised information is capable of significantly
                 improving the quality of selected features. However,
                 existing supervised feature selection methods all
                 require that classes in the labeled data (source
                 domain) and unlabeled data (target domain) to be
                 identical, which may be too restrictive in many cases.
                 In this article, we consider a more challenging
                 cross-class setting where the classes in these two
                 domains are related but different, which has rarely
                 been studied before. We propose a cross-class knowledge
                 transfer feature selection framework which transfers
                 the cross-class knowledge from the source domain to
                 guide target domain feature selection. Specifically,
                 high-level descriptions, i.e., attributes, are used as
                 the bridge for knowledge transfer. To further improve
                 the quality of the selected features, our framework
                 jointly considers the tasks of cross-class knowledge
                 transfer and feature selection. Experimental results on
                 four benchmark datasets demonstrate the superiority of
                 the proposed method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hong:2019:VGM,
  author =       "Junyuan Hong and Yang Li and Huanhuan Chen",
  title =        "Variant {Grassmann} Manifolds: a Representation
                 Augmentation Method for Action Recognition",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "23:1--23:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314203",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314203",
  abstract =     "In classification tasks, classifiers trained with
                 finite examples might generalize poorly to new data
                 with unknown variance. For this issue, data
                 augmentation is a successful solution where numerous
                 artificial examples are added to training sets. In this
                 article, we focus on the data augmentation for
                 improving the accuracy of action recognition, where
                 action videos are modeled by linear dynamical systems
                 and approximately represented as linear subspaces.
                 These subspace representations lie in a non-Euclidean
                 space, named Grassmann manifold, containing points as
                 orthonormal matrixes. It is our concern that poor
                 generalization may result from the variance of
                 manifolds when data come from different sources or
                 classes. Thus, we introduce infinitely many variant
                 Grassmann manifolds (VGM) subject to a known
                 distribution, then represent each action video as
                 different Grassmann points leading to augmented
                 representations. Furthermore, a prior based on the
                 stability of subspace bases is introduced, so the
                 manifold distribution can be adaptively determined,
                 balancing discrimination and representation.
                 Experimental results of multi-class and multi-source
                 classification show that VGM softmax classifiers
                 achieve lower test error rates compared to methods with
                 a single manifold.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2019:LLS,
  author =       "Yumeng Guo and Fulai Chung and Guozheng Li and
                 Jiancong Wang and James C. Gee",
  title =        "Leveraging Label-Specific Discriminant Mapping
                 Features for Multi-Label Learning",
  journal =      j-TKDD,
  volume =       "13",
  number =       "2",
  pages =        "24:1--24:??",
  month =        jun,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3319911",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:01 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3319911",
  abstract =     "As an important machine learning task, multi-label
                 learning deals with the problem where each sample
                 instance (feature vector) is associated with multiple
                 labels simultaneously. Most existing approaches focus
                 on manipulating the label space, such as exploiting
                 correlations between labels and reducing label space
                 dimension, with identical feature space in the process
                 of classification. One potential drawback of this
                 traditional strategy is that each label might have its
                 own specific characteristics and using identical
                 features for all label cannot lead to optimized
                 performance. In this article, we propose an effective
                 algorithm named LSDM, i.e., leveraging label-specific
                 discriminant mapping features for multi-label learning,
                 to overcome the drawback. LSDM sets diverse ratio
                 parameter values to conduct cluster analysis on the
                 positive and negative instances of identical label. It
                 reconstructs label-specific feature space which
                 includes distance information and spatial topology
                 information. Our experimental results show that
                 combining these two parts of information in the new
                 feature representation can better exploit the
                 clustering results in the learning process. Due to the
                 problem of diverse combinations for identical label, we
                 employ simplified linear discriminant analysis to
                 efficiently excavate optimal one for each label and
                 perform classification by querying the corresponding
                 results. Comparison with the state-of-the-art
                 algorithms on a total of 20 benchmark datasets clearly
                 manifests the competitiveness of LSDM.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gan:2019:SPS,
  author =       "Wensheng Gan and Jerry Chun-Wei Lin and Philippe
                 Fournier-Viger and Han-Chieh Chao and Philip S. Yu",
  title =        "A Survey of Parallel Sequential Pattern Mining",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "25:1--25:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314107",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314107",
  abstract =     "With the growing popularity of shared resources, large
                 volumes of complex data of different types are
                 collected automatically. Traditional data mining
                 algorithms generally have problems and challenges
                 including huge memory cost, low processing speed, and
                 inadequate hard disk space. As a fundamental task of
                 data mining, sequential pattern mining (SPM) is used in
                 a wide variety of real-life applications. However, it
                 is more complex and challenging than other pattern
                 mining tasks, i.e., frequent itemset mining and
                 association rule mining, and also suffers from the
                 above challenges when handling the large-scale data. To
                 solve these problems, mining sequential patterns in a
                 parallel or distributed computing environment has
                 emerged as an important issue with many applications.
                 In this article, an in-depth survey of the current
                 status of parallel SPM (PSPM) is investigated and
                 provided, including detailed categorization of
                 traditional serial SPM approaches, and state-of-the art
                 PSPM. We review the related work of PSPM in details
                 including partition-based algorithms for PSPM,
                 apriori-based PSPM, pattern-growth-based PSPM, and
                 hybrid algorithms for PSPM, and provide deep
                 description (i.e., characteristics, advantages,
                 disadvantages, and summarization) of these parallel
                 approaches of PSPM. Some advanced topics for PSPM,
                 including parallel quantitative/weighted/utility SPM,
                 PSPM from uncertain data and stream data, hardware
                 acceleration for PSPM, are further reviewed in details.
                 Besides, we review and provide some well-known
                 open-source software of PSPM. Finally, we summarize
                 some challenges and opportunities of PSPM in the big
                 data era.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mahmoudi:2019:RBO,
  author =       "Amin Mahmoudi and Mohd Ridzwan Yaakub and Azuraliza
                 Abu Bakar",
  title =        "The Relationship between Online Social Network Ties
                 and User Attributes",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "26:1--26:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314204",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314204",
  abstract =     "The distance between users has an effect on the
                 formation of social network ties, but it is not the
                 only or even the main factor. Knowing all the features
                 that influence such ties is very important for many
                 related domains such as location-based recommender
                 systems and community and event detection systems for
                 online social networks (OSNs). In recent years,
                 researchers have analyzed the role of user geo-location
                 in OSNs. Researchers have also attempted to determine
                 the probability of friendships being established based
                 on distance, where friendship is not only a function of
                 distance. However, some important features of OSNs
                 remain unknown. In order to comprehensively understand
                 the OSN phenomenon, we also need to analyze users'
                 attributes. Basically, an OSN functions according to
                 four main user properties: user geo-location, user
                 weight, number of user interactions, and user lifespan.
                 The research presented here sought to determine whether
                 the user mobility pattern can be used to predict users'
                 interaction behavior. It also investigated whether, in
                 addition to distance, the number of friends (known as
                 user weight) interferes in social network tie
                 formation. To this end, we analyzed the above-stated
                 features in three large-scale OSNs. We found that
                 regardless of a high degree freedom in user mobility,
                 the fraction of the number of outside activities over
                 the inside activity is a significant fraction that
                 helps us to address the user interaction behavior. To
                 the best of our knowledge, research has not been
                 conducted elsewhere on this issue. We also present a
                 high-resolution formula in order to improve the
                 friendship probability function.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2019:MTC,
  author =       "Yao Zhou and Lei Ying and Jingrui He",
  title =        "Multi-task Crowdsourcing via an Optimization
                 Framework",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "27:1--27:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3310227",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3310227",
  abstract =     "The unprecedented amounts of data have catalyzed the
                 trend of combining human insights with machine learning
                 techniques, which facilitate the use of crowdsourcing
                 to enlist label information both effectively and
                 efficiently. One crucial challenge in crowdsourcing is
                 the diverse worker quality, which determines the
                 accuracy of the label information provided by such
                 workers. Motivated by the observations that same set of
                 tasks are typically labeled by the same set of workers,
                 we studied their behaviors across multiple related
                 tasks and proposed an optimization framework for
                 learning from task and worker dual heterogeneity. The
                 proposed method uses a weight tensor to represent the
                 workers' behaviors across multiple tasks, and seeks to
                 find the optimal solution of the tensor by exploiting
                 its structured information. Then, we propose an
                 iterative algorithm to solve the optimization problem
                 and analyze its computational complexity. To infer the
                 true label of an example, we construct a worker
                 ensemble based on the estimated tensor, whose decisions
                 will be weighted using a set of entropy weight. We also
                 prove that the gradient of the most time-consuming
                 updating block is separable with respect to the
                 workers, which leads to a randomized algorithm with
                 faster speed. Moreover, we extend the learning
                 framework to accommodate to the multi-class setting.
                 Finally, we test the performance of our framework on
                 several datasets, and demonstrate its superiority over
                 state-of-the-art techniques.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2019:RRH,
  author =       "Xuchao Zhang and Shuo Lei and Liang Zhao and Arnold P.
                 Boedihardjo and Chang-Tien Lu",
  title =        "Robust Regression via Heuristic Corruption
                 Thresholding and Its Adaptive Estimation Variation",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "28:1--28:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314105",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314105",
  abstract =     "The presence of data noise and corruptions has
                 recently invoked increasing attention on robust
                 least-squares regression (RLSR), which addresses this
                 fundamental problem that learns reliable regression
                 coefficients when response variables can be arbitrarily
                 corrupted. Until now, the following important
                 challenges could not be handled concurrently: (1)
                 rigorous recovery guarantee of regression coefficients,
                 (2) difficulty in estimating the corruption ratio
                 parameter, and (3) scaling to massive datasets. This
                 article proposes a novel Robust regression algorithm
                 via Heuristic Corruption Thresholding (RHCT) that
                 concurrently addresses all the above challenges.
                 Specifically, the algorithm alternately optimizes the
                 regression coefficients and estimates the optimal
                 uncorrupted set via heuristic thresholding without a
                 pre-defined corruption ratio parameter until its
                 convergence. Moreover, to improve the efficiency of
                 corruption estimation in large-scale data, a Robust
                 regression algorithm via Adaptive Corruption
                 Thresholding (RACT) is proposed to determine the size
                 of the uncorrupted set in a novel adaptive search
                 method without iterating data samples exhaustively. In
                 addition, we prove that our algorithms benefit from
                 strong guarantees analogous to those of
                 state-of-the-art methods in terms of convergence rates
                 and recovery guarantees. Extensive experiments
                 demonstrate that the effectiveness of our new methods
                 is superior to that of existing methods in the recovery
                 of both regression coefficients and uncorrupted sets,
                 with very competitive efficiency.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2019:IDP,
  author =       "Zhitao Wang and Chengyao Chen and Wenjie Li",
  title =        "Information Diffusion Prediction with Network
                 Regularized Role-based User Representation Learning",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "29:1--29:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3314106",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3314106",
  abstract =     "In this article, we aim at developing a user
                 representation learning model to solve the information
                 diffusion prediction problem in social media. The main
                 idea is to project the diffusion users into a
                 continuous latent space as the role-based (sender and
                 receiver) representations, which capture unique
                 diffusion characteristics of users. The model learns
                 the role-based representations based on a cascade
                 modeling objective that aims at maximizing the
                 likelihood of observed cascades, and employs the matrix
                 factorization objective of reconstructing structural
                 proximities as a regularization on representations. By
                 jointly embedding the information of cascades and
                 network, the learned representations are robust on
                 different diffusion data. We evaluate the proposed
                 model on three real-world datasets. The experimental
                 results demonstrate the better performance of the
                 proposed model than state-of-the-art diffusion
                 embedding and network embedding models and other
                 popular graph-based methods.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ju:2019:TRB,
  author =       "Fujiao Ju and Yanfeng Sun and Junbin Gao and Michael
                 Antolovich and Junliang Dong and Baocai Yin",
  title =        "Tensorizing Restricted {Boltzmann} Machine",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "30:1--30:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3321517",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3321517",
  abstract =     "Restricted Boltzmann machine (RBM) is a famous model
                 for feature extraction and can be used as an
                 initializer for neural networks. When applying the
                 classic RBM to multidimensional data such as 2D/3D
                 tensors, one needs to vectorize such as high-order
                 data. Vectorizing will result in dimensional disaster
                 and valuable spatial information loss. As RBM is a
                 model with fully connected layers, it requires a large
                 amount of memory. Therefore, it is difficult to use RBM
                 with high-order data on low-end devices. In this
                 article, to utilize classic RBM on tensorial data
                 directly, we propose a new tensorial RBM model
                 parameterized by the tensor train format (TTRBM). In
                 this model, both visible and hidden variables are in
                 tensorial form, which are connected by a parameter
                 matrix in tensor train format. The biggest advantage of
                 the proposed model is that TTRBM can obtain comparable
                 performance compared with the classic RBM with much
                 fewer model parameters and faster training process. To
                 demonstrate the advantages of TTRBM, we conduct three
                 real-world applications, face reconstruction,
                 handwritten digit recognition, and image
                 super-resolution in the experiments.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2019:LKI,
  author =       "Chenyang Liu and Jian Cao and Shanshan Feng",
  title =        "Leveraging Kernel-Incorporated Matrix Factorization
                 for App Recommendation",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "31:1--31:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3320482",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3320482",
  abstract =     "The ever-increasing number of smartphone applications
                 (apps) available on different app markets poses a
                 challenge for personalized app recommendation.
                 Conventional collaborative filtering-based
                 recommendation methods suffer from sparse and binary
                 user-app implicit feedback, which results in poor
                 performance in discriminating user-app preferences. In
                 this article, we first propose two kernel incorporated
                 probabilistic matrix factorization models, which
                 introduce app-categorical information to constrain the
                 user and app latent features to be similar to their
                 neighbors in the latent space. The two models are
                 solved by Stochastic Gradient Descent with a
                 user-oriented negative sampling scheme. To further
                 improve the recommendation performance, we construct
                 pseudo user-app ratings based on user-app usage
                 information, and propose a novel kernelized
                 non-negative matrix factorization by incorporating
                 non-negative constraints on latent factors to predict
                 user-app preferences. This model also leverages
                 user--user and app--app similarities with regard to
                 app-categorical information to mine the latent
                 geometric structure in the pseudo-rating space.
                 Adopting the Karush--Kuhn--Tucker conditions, a
                 Multiplicative Updating Rules based optimization is
                 proposed for model learning, and the convergence is
                 proved by introducing an auxiliary function. The
                 experimental results on a real user-app installation
                 usage dataset show the comparable performance of our
                 models with the state-of-the-art baselines in terms of
                 two ranking-oriented evaluation metrics.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dehghan:2019:TDE,
  author =       "Mahdi Dehghan and Ahmad Ali Abin",
  title =        "Translations Diversification for Expert Finding: a
                 Novel Clustering-based Approach",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "32:1--32:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3320489",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3320489",
  abstract =     "Expert finding is the task of retrieving and ranking
                 knowledgeable people in the subject of user's query. It
                 is a well-studied problem that has attracted the
                 attention of many researchers. The most important
                 challenge in expert finding is to determine the
                 similarity between query words and documents authored
                 by candidate experts. One of the most important
                 challenges in Information Retrieval (IR) community is
                 the issue of vocabulary gap between queries and
                 documents. In this study, a translation model based on
                 words clustering in two query and co-occurrence spaces
                 is proposed to overcome this problem. First, the words
                 that are semantically close, are clustered in a query
                 space and then each cluster in this space are clustered
                 again in a co-occurrence space. Representatives of each
                 cluster in the co-occurrence space are considered as a
                 diverse subset of the parent cluster. By this method,
                 the query translations are expected to be diversified
                 in the query space. Next, a probabilistic model, that
                 is based on the belonging degree of word to cluster and
                 similarity of cluster to query in the query space, is
                 used to consider the problem of vocabulary gap.
                 Finally, the corresponding translations to each query
                 are used in conjunction with a combination model for
                 expert finding. Experiments on Stack Overflow dataset
                 show the effectiveness of the proposed method for
                 expert finding.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Iqbal:2019:BPE,
  author =       "Mohsin Iqbal and Asim Karim and Faisal Kamiran",
  title =        "Balancing Prediction Errors for Robust Sentiment
                 Classification",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "33:1--33:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3328795",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3328795",
  abstract =     "Sentiment classification is a popular text mining task
                 in which textual content (e.g., a message) is assigned
                 a polarity label (typically positive or negative)
                 reflecting the sentiment expressed in it. Sentiment
                 classification is used widely in applications like
                 customer feedback analysis where robustness and
                 correctness of results are critical. In this article,
                 we highlight that prediction accuracy alone is not
                 sufficient for assessing the performance of a sentiment
                 classifier; it is also important that the classifier is
                 not biased toward positive or negative polarity, thus
                 distorting the distribution of positive and negative
                 messages in the predictions. We propose a measure,
                 called Polarity Bias Rate, for quantifying this bias in
                 a sentiment classifier. Second, we present two methods
                 for removing this bias in the predictions of
                 unsupervised and supervised sentiment classifiers. Our
                 first method, called Bias-Aware Thresholding (BAT),
                 shifts the decision boundary to control the bias in the
                 predictions. Motivated from cost-sensitive learning,
                 BAT is easily applicable to both lexicon-based
                 unsupervised and supervised classifiers. Our second
                 method, called Balanced Logistic Regression (BLR)
                 introduces a bias-remover constraint into the standard
                 logistic regression model. BLR is an automatic
                 bias-free supervised sentiment classifier. We evaluate
                 our methods extensively on seven real-world datasets.
                 The experiments involve two lexicon-based and two
                 supervised sentiment classifiers and include evaluation
                 on multiple train-test data sizes. The results show
                 that bias is controlled effectively in predictions.
                 Furthermore, prediction accuracy is also increased in
                 many cases, thus enhancing the robustness of sentiment
                 classification.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2019:ICS,
  author =       "Mingyue Zhang and Xuan Wei and Xunhua Guo and Guoqing
                 Chen and Qiang Wei",
  title =        "Identifying Complements and Substitutes of Products: a
                 Neural Network Framework Based on Product Embedding",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "34:1--34:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3320277",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3320277",
  abstract =     "Complements and substitutes are two typical product
                 relationships that deserve consideration in online
                 product recommendation. One of the key objectives of
                 recommender systems is to promote cross-selling, which
                 heavily relies on recommending the appropriate type of
                 products in specific scenarios. Research on consumer
                 behavior has shown that consumers usually prefer
                 substitutes in the browsing stage whereas complements
                 in the purchasing stage. Thus, it is of great
                 importance to identify the complementary and
                 substitutable relationships between products. In this
                 article, we design a neural network based framework
                 that integrates the textual content and non-textual
                 information of online reviews to mine product
                 relationships. For the textual content, we utilize
                 methods such as LDA topic modeling to represent
                 products in a succinct form called ``embedding.'' To
                 capture the semantics of complementary and
                 substitutable relationships, we design a modeling
                 process that transfers the product embeddings into
                 semantic features and incorporates additional
                 non-textual factors of product reviews. Extensive
                 experiments are conducted to verify the effectiveness
                 of the proposed product relationship mining model. The
                 advantages and robustness of our model are discussed
                 from various perspectives.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2019:RNC,
  author =       "Yourong Huang and Zhu Xiao and Xiaoyou Yu and Dong
                 Wang and Vincent Havyarimana and Jing Bai",
  title =        "Road Network Construction with Complex Intersections
                 Based on Sparsely Sampled Private Car Trajectory Data",
  journal =      j-TKDD,
  volume =       "13",
  number =       "3",
  pages =        "35:1--35:??",
  month =        jul,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3326060",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3326060",
  abstract =     "A road network is a critical aspect of both urban
                 planning and route recommendation. This article
                 proposes an efficient approach to build a fine-grained
                 road network based on sparsely sampled private car
                 trajectory data under complex urban environment. In
                 order to resolve difficulties introduced by low
                 sampling rate trajectory data, we concentrate sample
                 points around intersections by utilizing the turning
                 characteristics from the large-scale trajectory data to
                 ensure the accuracy of the detection of intersections
                 and road segments. In front of complex road networks
                 including many complex intersections, such as the
                 overpasses and underpasses, we first layer
                 intersections into major and minor one, and then
                 propose a simplified representation of intersections
                 and corresponding computable model based on the
                 features of roads, which can significantly improve the
                 accuracy of detected road networks, especially for the
                 complex intersections. In order to construct
                 fine-grained road networks, we distinguish various
                 types of intersections using direction information and
                 detected turning limit. To the best of our knowledge,
                 our road network building method is the first time to
                 give fine-grained road networks based on low-sampling
                 rate private car trajectory data, especially able to
                 infer the location of complex intersections and its
                 connections to other intersections. Last but not the
                 least, we propose an effective parameter selection
                 process for the Density-Based Spatial Clustering of
                 Applications with Noise based clustering algorithm,
                 which is used to implement the reliable intersection
                 detection. Extensive evaluations are conducted based on
                 a real-world trajectory dataset from 1,345 private cars
                 in Futian district, Shenzhen city of China. The results
                 demonstrate the effectiveness of the proposed method.
                 The constructed road network matches close to the one
                 from a public editing map OpenStreetMap, especially the
                 location of the road intersections and road segments,
                 which achieves 92.2\% intersections within 20m and
                 91.6\% road segments within 8m.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dornaika:2019:ATP,
  author =       "Fadi Dornaika",
  title =        "Active Two Phase Collaborative Representation
                 Classifier",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "36:1--36:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3326919",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3326919",
  abstract =     "The Sparse Representation Classifier, the
                 Collaborative Representation Classifier (CRC), and the
                 Two Phase Test Sample Sparse Representation (TPTSSR)
                 classifier were introduced in recent times. All these
                 frameworks are supervised and passive in the sense that
                 they cannot benefit from unlabeled data samples. In
                 this paper, inspired by active learning paradigms, we
                 introduce an active CRC that can be used by these
                 frameworks. More precisely, we are interested in the
                 TPTSSR framework due to its good performance and its
                 reasonable computational cost. Our proposed Active Two
                 Phase Collaborative Representation Classifier (ATPCRC)
                 starts by predicting the label of the available
                 unlabeled samples. At testing stage, two coding
                 processes are carried out separately on the set of
                 originally labeled samples and the whole set (original
                 and predicted label). The two types of class-wise
                 reconstruction errors are blended in order to decide
                 the class of any test image. Experiments conducted on
                 four public image datasets show that the proposed
                 ATPCRC can outperform the classic TPTSSR as well as
                 many state-of-the-art methods that exploit label and
                 unlabeled data samples.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2019:TSV,
  author =       "Wenmain Yang and Kun Wang and Na Ruan and Wenyuan Gao
                 and Weijia Jia and Wei Zhao and Nan Liu and Yunyong
                 Zhang",
  title =        "Time-Sync Video Tag Extraction Using Semantic
                 Association Graph",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "37:1--37:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332932",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332932",
  abstract =     "Time-sync comments (TSCs) reveal a new way of
                 extracting the online video tags. However, such TSCs
                 have lots of noises due to users' diverse comments,
                 introducing great challenges for accurate and fast
                 video tag extractions. In this article, we propose an
                 unsupervised video tag extraction algorithm named
                 Semantic Weight-Inverse Document Frequency (SW-IDF).
                 Specifically, we first generate corresponding semantic
                 association graph (SAG) using semantic similarities and
                 timestamps of the TSCs. Second, we propose two graph
                 cluster algorithms, i.e., dialogue-based algorithm and
                 topic center-based algorithm, to deal with the videos
                 with different density of comments. Third, we design a
                 graph iteration algorithm to assign the weight to each
                 comment based on the degrees of the clustered
                 subgraphs, which can differentiate the meaningful
                 comments from the noises. Finally, we gain the weight
                 of each word by combining Semantic Weight (SW) and
                 Inverse Document Frequency (IDF). In this way, the
                 video tags are extracted automatically in an
                 unsupervised way. Extensive experiments have shown that
                 SW-IDF (dialogue-based algorithm) achieves 0.4210
                 F1-score and 0.4932 MAP (Mean Average Precision) in
                 high-density comments, 0.4267 F1-score and 0.3623 MAP
                 in low-density comments; while SW-IDF (topic
                 center-based algorithm) achieves 0.4444 F1-score and
                 0.5122 MAP in high-density comments, 0.4207 F1-score
                 and 0.3522 MAP in low-density comments. It has a better
                 performance than the state-of-the-art unsupervised
                 algorithms in both F1-score and MAP.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amelkin:2019:DMA,
  author =       "Victor Amelkin and Petko Bogdanov and Ambuj K. Singh",
  title =        "A Distance Measure for the Analysis of Polar Opinion
                 Dynamics in Social Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "38:1--38:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332168",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332168",
  abstract =     "Analysis of opinion dynamics in social networks plays
                 an important role in today's life. For predicting
                 users' political preference, it is particularly
                 important to be able to analyze the dynamics of
                 competing polar opinions, such as pro-Democrat vs.
                 pro-Republican. While observing the evolution of polar
                 opinions in a social network over time, can we tell
                 when the network evolved abnormally? Furthermore, can
                 we predict how the opinions of the users will change in
                 the future? To answer such questions, it is
                 insufficient to study individual user behavior, since
                 opinions can spread beyond users' ego-networks.
                 Instead, we need to consider the opinion dynamics of
                 all users simultaneously and capture the connection
                 between the individuals' behavior and the global
                 evolution pattern of the social network. In this work,
                 we introduce the Social Network Distance (SND)-a
                 distance measure that quantifies the likelihood of
                 evolution of one snapshot of a social network into
                 another snapshot under a chosen model of polar opinion
                 dynamics. SND has a rich semantics of a transportation
                 problem, yet, is computable in time linear in the
                 number of users and, as such, is applicable to
                 large-scale online social networks. In our experiments
                 with synthetic and Twitter data, we demonstrate the
                 utility of our distance measure for anomalous event
                 detection. It achieves a true positive rate of 0.83,
                 twice as high as that of alternatives. The same
                 predictions presented in precision-recall space show
                 that SND retains perfect precision for recall up to
                 0.2. Its precision then decreases while maintaining
                 more than 2-fold improvement over alternatives for
                 recall up to 0.95. When used for opinion prediction in
                 Twitter data, SND's accuracy is 75.6\%, which is 7.5\%
                 higher than that of the next best method.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2019:MCP,
  author =       "Haoran Chen and Jinghua Li and Junbin Gao and Yanfeng
                 Sun and Yongli Hu and Baocai Yin",
  title =        "Maximally Correlated Principal Component Analysis
                 Based on Deep Parameterization Learning",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "39:1--39:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332183",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332183",
  abstract =     "Dimensionality reduction is widely used to deal with
                 high-dimensional data. As a famous dimensionality
                 reduction method, principal component analysis (PCA)
                 aiming at finding the low dimension feature of original
                 data has made great successes, and many improved PCA
                 algorithms have been proposed. However, most algorithms
                 based on PCA only consider the linear correlation of
                 data features. In this article, we propose a novel
                 dimensionality reduction model called maximally
                 correlated PCA based on deep parameterization learning
                 (MCPCADP), which takes nonlinear correlation into
                 account in the deep parameterization framework for the
                 purpose of dimensionality reduction. The new model
                 explores nonlinear correlation by maximizing Ky-Fan
                 norm of the covariance matrix of nonlinearly mapped
                 data features. A new BP algorithm for model
                 optimization is derived. In order to assess the
                 proposed method, we conduct experiments on both a
                 synthetic database and several real-world databases.
                 The experimental results demonstrate that the proposed
                 algorithm is comparable to several widely used
                 algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gallardo:2019:IVE,
  author =       "Laura Fern{\'a}ndez Gallardo and Ramon
                 Sanchez-Iborra",
  title =        "On the Impact of Voice Encoding and Transmission on
                 the Predictions of Speaker Warmth and Attractiveness",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "40:1--40:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332146",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332146",
  abstract =     "Modern human-computer interaction systems may not only
                 be based on interpreting natural language but also on
                 detecting speaker interpersonal characteristics in
                 order to determine dialog strategies. This may be of
                 high interest in different fields such as telephone
                 marketing or automatic voice-based interactive
                 services. However, when such systems encounter signals
                 transmitted over a communication network instead of
                 clean speech, e.g., in call centers, the speaker
                 characterization accuracy might be impaired by the
                 degradations caused in the speech signal by the
                 encoding and communication processes. This article
                 addresses a binary classification of high versus low
                 warm--attractive speakers over different channel and
                 encoding conditions. The ground truth is derived from
                 ratings given to clean speech extracted from an
                 extensive subjective test. Our results show that, under
                 the considered conditions, the AMR-WB+ codec permits
                 good levels of classification accuracy, comparable to
                 the classification with clean, non-degraded speech.
                 This is especially notable for the case of a Random
                 Forest-based classifier, which presents the best
                 performance among the set of evaluated algorithms. The
                 impact of different packet loss rates has been
                 examined, whereas jitter effects have been found to be
                 negligible.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Comito:2019:BED,
  author =       "Carmela Comito and Agostino Forestiero and Clara
                 Pizzuti",
  title =        "Bursty Event Detection in {Twitter} Streams",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "41:1--41:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332185",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332185",
  abstract =     "Social media, in recent years, have become an
                 invaluable source of information for both public and
                 private organizations to enhance the comprehension of
                 people interests and the onset of new events. Twitter,
                 especially, allows a fast spread of news and events
                 happening real time that can contribute to situation
                 awareness during emergency situations, but also to
                 understand trending topics of a period. The article
                 proposes an online algorithm that incrementally groups
                 tweet streams into clusters. The approach summarizes
                 the examined tweets into the cluster centroid by
                 maintaining a number of textual and temporal features
                 that allow the method to effectively discover groups of
                 interest on particular themes. Experiments on messages
                 posted by users addressing different issues, and a
                 comparison with state-of-the-art approaches show that
                 the method is capable to detect discussions regarding
                 topics of interest, but also to distinguish bursty
                 events revealed by a sudden spreading of attention on
                 messages published by users.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qiang:2019:HLT,
  author =       "Jipeng Qiang and Ping Chen and Wei Ding and Tong Wang
                 and Fei Xie and Xindong Wu",
  title =        "Heterogeneous-Length Text Topic Modeling for
                 Reader-Aware Multi-Document Summarization",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "42:1--42:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3333030",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3333030",
  abstract =     "More and more user comments like Tweets are available,
                 which often contain user concerns. In order to meet the
                 demands of users, a good summary generating from
                 multiple documents should consider reader interests as
                 reflected in reader comments. In this article, we focus
                 on how to generate a summary from multi-document
                 documents by considering reader comments, named as
                 reader-aware multi-document summarization (RA-MDS). We
                 present an innovative topic-based method for RA-MDA,
                 which exploits latent topics to obtain the most salient
                 and lessen redundancy summary from multiple documents.
                 Since finding latent topics for RA-MDS is a crucial
                 step, we also present a Heterogeneous-length Text Topic
                 Modeling (HTTM) to extract topics from the corpus that
                 includes both news reports and user comments, denoted
                 as heterogeneous-length texts. In this case, the latent
                 topics extract by HTTM cover not only important aspects
                 of the event, but also aspects that attract reader
                 interests. Comparisons on summary benchmark datasets
                 also confirm that the proposed RA-MDS method is
                 effective in improving the quality of extracted
                 summaries. In addition, experimental results
                 demonstrate that the proposed topic modeling method
                 outperforms existing topic modeling algorithms.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2019:HDE,
  author =       "Qingyang Li and Zhiwen Yu and Bin Guo and Huang Xu and
                 Xinjiang Lu",
  title =        "Housing Demand Estimation Based on Express Delivery
                 Data",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "43:1--43:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3332522",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3332522",
  abstract =     "Housing demand estimation is an important topic in the
                 field of economic research. It is beneficial and
                 helpful for various applications including real estate
                 market regulation and urban planning, and therefore is
                 crucial for both real estate investors and government
                 administrators. Meanwhile, given the rapid development
                 of the express industry, abundant useful information is
                 embedded in express delivery records, which is helpful
                 for researchers in profiling urban life patterns. The
                 express delivery behaviors of the residents in a
                 residential community can reflect the housing demand to
                 some extent. Although housing demand has been analyzed
                 in previous studies, its estimation has not been very
                 good, and the subject remains under explored. To this
                 end, in this article, we propose a systematic housing
                 demand estimation method based on express delivery
                 data. First, the express delivery records are
                 aggregated on the community scale with the use of
                 clustering methods, and the missing values in the
                 records are completed. Then, various features are
                 extracted from a less sparse dataset considering both
                 the probability of residential mobility and the
                 attractiveness of residential communities. In addition,
                 given that the correlations between different districts
                 can influence the performances of the inference model,
                 the commonalities and differences of different
                 districts are considered. After obtaining the features
                 and correlations between different districts being
                 obtained, the housing demand is estimated by using a
                 multi-task learning method based on neural networks.
                 The experimental results for real-world data show that
                 the proposed model is effective at estimating the
                 housing demand at the residential community level.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sajadmanesh:2019:CTR,
  author =       "Sina Sajadmanesh and Sogol Bazargani and Jiawei Zhang
                 and Hamid R. Rabiee",
  title =        "Continuous-Time Relationship Prediction in Dynamic
                 Heterogeneous Information Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "44:1--44:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3333028",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3333028",
  abstract =     "Online social networks, World Wide Web, media, and
                 technological networks, and other types of so-called
                 information networks are ubiquitous nowadays. These
                 information networks are inherently heterogeneous and
                 dynamic. They are heterogeneous as they consist of
                 multi-typed objects and relations, and they are dynamic
                 as they are constantly evolving over time. One of the
                 challenging issues in such heterogeneous and dynamic
                 environments is to forecast those relationships in the
                 network that will appear in the future. In this
                 article, we try to solve the problem of continuous-time
                 relationship prediction in dynamic and heterogeneous
                 information networks. This implies predicting the time
                 it takes for a relationship to appear in the future,
                 given its features that have been extracted by
                 considering both heterogeneity and temporal dynamics of
                 the underlying network. To this end, we first introduce
                 a feature extraction framework that combines the power
                 of meta-path-based modeling and recurrent neural
                 networks to effectively extract features suitable for
                 relationship prediction regarding heterogeneity and
                 dynamicity of the networks. Next, we propose a
                 supervised non-parametric approach, called
                 Non-Parametric Generalized Linear Model (Np-Glm), which
                 infers the hidden underlying probability distribution
                 of the relationship building time given its features.
                 We then present a learning algorithm to train Np-Glm
                 and an inference method to answer time-related queries.
                 Extensive experiments conducted on synthetic data and
                 three real-world datasets, namely Delicious, MovieLens,
                 and DBLP, demonstrate the effectiveness of Np-Glm in
                 solving continuous-time relationship prediction problem
                 vis-{\`a}-vis competitive baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ben-Gal:2019:CUT,
  author =       "Irad Ben-Gal and Shahar Weinstock and Gonen Singer and
                 Nicholas Bambos",
  title =        "Clustering Users by Their Mobility Behavioral
                 Patterns",
  journal =      j-TKDD,
  volume =       "13",
  number =       "4",
  pages =        "45:1--45:??",
  month =        aug,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3322126",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3322126",
  abstract =     "The immense stream of data from mobile devices during
                 recent years enables one to learn more about human
                 behavior and provide mobile phone users with
                 personalized services. In this work, we identify
                 clusters of users who share similar mobility behavioral
                 patterns. We analyze trajectories of semantic locations
                 to find users who have similar mobility ``lifestyle,''
                 even when they live in different areas. For this task,
                 we propose a new grouping scheme that is called
                 Lifestyle-Based Clustering (LBC). We represent the
                 mobility movement of each user by a Markov model and
                 calculate the Jensen-Shannon distances among pairs of
                 users. The pairwise distances are represented by a
                 similarity matrix, which is used for the clustering. To
                 validate the unsupervised clustering task, we develop
                 an entropy-based clustering measure, namely, an index
                 that measures the homogeneity of mobility patterns
                 within clusters of users. The analysis is validated on
                 a real-world dataset that contains location-movements
                 of 50,000 cellular phone users that were analyzed over
                 a two-month period.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2019:FGA,
  author =       "Yanan Xu and Yanmin Zhu and Yanyan Shen and Jiadi Yu",
  title =        "Fine-Grained Air Quality Inference with Remote Sensing
                 Data and Ubiquitous Urban Data",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "46:1--46:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3340847",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3340847",
  abstract =     "Air quality has gained much attention in recent years
                 and is of great importance to protecting people's
                 health. Due to the influence of multiple factors, the
                 limited air quality monitoring stations deployed in
                 cities are unable to provide fine-grained air quality
                 information. One cost-effective way is to infer air
                 quality with records from existing monitoring stations.
                 However, the severe data sparsity problem (e.g., only
                 0.2\% data are known) leads to the failure of most
                 inference methods. We observe that remote sensing data
                 are of high quality and have a strong correlation with
                 the air quality. Therefore, we propose to integrate
                 remote sensing data and ubiquitous urban data for the
                 air quality inference. But there are two main
                 challenges, i.e., data heterogeneity and incompleteness
                 of the remote sensing data. To address the challenges,
                 we propose a two-stage approach. In the first stage, we
                 infer and predict air quality conditions of some places
                 leveraging the remote sensing data and meteorological
                 data with two proposed ANN-based methods, respectively.
                 This stage significantly alleviates the data sparsity
                 problem. In the second stage, the records and estimated
                 air quality data are put in a tensor. A tensor
                 decomposition method is applied to complete the tensor.
                 The features extracted from urban data are classified
                 into the spatial features (i.e., road features and POI
                 features) and the temporal features (i.e.,
                 meteorological features) as the constraints to further
                 address the data sparsity problem. In addition, an
                 iterative training framework is proposed to improve the
                 inference performance. Experiments on a real-world
                 dataset show that our approach outperforms
                 state-of-the-art methods, such as U-Air.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2019:PMM,
  author =       "Xiren Zhou and Huanhuan Chen and Jinlong Li",
  title =        "Probabilistic Mixture Model for Mapping the
                 Underground Pipes",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "47:1--47:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344721",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3344721",
  abstract =     "Buried pipes beneath our city are blood vessels that
                 feed human civilization through the supply of water,
                 gas, electricity, and so on, and mapping the buried
                 pipes has long been addressed as an issue. In this
                 article, a suitable coordinate of the detected area is
                 established, the noisy Ground Penetrating Radar (GPR)
                 and Global Positioning System (GPS) data are analyzed
                 and normalized, and the pipeline is described
                 mathematically. Based on these, the Probabilistic
                 Mixture Model is proposed to map the buried pipes,
                 which takes discrete noisy GPR and GPS data as the
                 input and the accurate pipe locations and directions as
                 the output. The proposed model consists of the
                 Preprocessing, the Pipe Fitting algorithm, the
                 Classification Fitting Expectation Maximization (CFEM)
                 algorithm, and the Angle-limited Hough (Al-Hough)
                 transform. The direction information of the detecting
                 point is added into the measuring of the distance from
                 the point to nearby pipelines, to handle some areas
                 where the pipes are intersected or difficult to
                 classify. The Expectation Maximization (EM) algorithm
                 is upgraded to CFEM algorithm that is able to classify
                 detecting points into different classes, and connect
                 and fit multiple points in each class to get accurate
                 pipeline locations and directions, and the Al-Hough
                 transform provides reliable initializations for CFEM,
                 to some extent, ensuring the convergence of the
                 proposed model. The experimental results on the
                 simulated and real-world datasets demonstrate the
                 effectiveness of the proposed model.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2019:BMS,
  author =       "Fei Jiang and Guosheng Yin and Francesca Dominici",
  title =        "{Bayesian} Model Selection Approach to Multiple
                 Change-Points Detection with Non-Local Prior
                 Distributions",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "48:1--48:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3340804",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3340804",
  abstract =     "We propose a Bayesian model selection (BMS) boundary
                 detection procedure using non-local prior distributions
                 for a sequence of data with multiple systematic mean
                 changes. By using the non-local priors in the BMS
                 framework, the BMS method can effectively suppress the
                 non-boundary spike points with large instantaneous
                 changes. Further, we speedup the algorithm by reducing
                 the multiple change points to a series of single change
                 point detection problems. We establish the consistency
                 of the estimated number and locations of the change
                 points under various prior distributions. From both
                 theoretical and numerical perspectives, we show that
                 the non-local inverse moment prior leads to the fastest
                 convergence rate in identifying the true change points
                 on the boundaries. Extensive simulation studies are
                 conducted to compare the BMS with existing methods, and
                 our method is illustrated with application to the
                 magnetic resonance imaging guided radiation therapy
                 data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2019:RTE,
  author =       "Yun Wang and Guojie Song and Lun Du and Zhicong Lu",
  title =        "Real-Time Estimation of the Urban Air Quality with
                 Mobile Sensor System",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "49:1--49:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3356584",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3356584",
  abstract =     "Recently, real-time air quality estimation has
                 attracted more and more attention from all over the
                 world, which is close to our daily life. With the
                 prevalence of mobile sensors, there is an emerging way
                 to monitor the air quality with mobile sensors on
                 vehicles. Compared with traditional expensive monitor
                 stations, mobile sensors are cheaper and more abundant,
                 but observations from these sensors have unstable
                 spatial and temporal distributions, which results in
                 the existing model could not work very well on this
                 type of data. In this article, taking advantage of air
                 quality data from mobile sensors, we propose an
                 real-time urban air quality estimation method based on
                 the Gaussian Process Regression for air pollution of
                 the unmonitored areas, pivoting on the diffusion effect
                 and the accumulation effect of air pollution. In order
                 to meet the real-time demands, we propose a two-layer
                 ensemble learning framework and a self-adaptivity
                 mechanism to improve computational efficiency and
                 adaptivity. We evaluate our model with real data from
                 mobile sensor system located in Beijing, China. And the
                 experiments show that our proposed model is superior to
                 the state-of-the-art spatial regression methods in both
                 precision and time performances.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xue:2019:SAP,
  author =       "Yu Xue and Bing Xue and Mengjie Zhang",
  title =        "Self-Adaptive Particle Swarm Optimization for
                 Large-Scale Feature Selection in Classification",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "50:1--50:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3340848",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3340848",
  abstract =     "Many evolutionary computation (EC) methods have been
                 used to solve feature selection problems and they
                 perform well on most small-scale feature selection
                 problems. However, as the dimensionality of feature
                 selection problems increases, the solution space
                 increases exponentially. Meanwhile, there are more
                 irrelevant features than relevant features in datasets,
                 which leads to many local optima in the huge solution
                 space. Therefore, the existing EC methods still suffer
                 from the problem of stagnation in local optima on
                 large-scale feature selection problems. Furthermore,
                 large-scale feature selection problems with different
                 datasets may have different properties. Thus, it may be
                 of low performance to solve different large-scale
                 feature selection problems with an existing EC method
                 that has only one candidate solution generation
                 strategy (CSGS). In addition, it is time-consuming to
                 find a suitable EC method and corresponding suitable
                 parameter values for a given large-scale feature
                 selection problem if we want to solve it effectively
                 and efficiently. In this article, we propose a
                 self-adaptive particle swarm optimization (SaPSO)
                 algorithm for feature selection, particularly for
                 large-scale feature selection. First, an encoding
                 scheme for the feature selection problem is employed in
                 the SaPSO. Second, three important issues related to
                 self-adaptive algorithms are investigated. After that,
                 the SaPSO algorithm with a typical self-adaptive
                 mechanism is proposed. The experimental results on 12
                 datasets show that the solution size obtained by the
                 SaPSO algorithm is smaller than its EC counterparts on
                 all datasets. The SaPSO algorithm performs better than
                 its non-EC and EC counterparts in terms of
                 classification accuracy not only on most training sets
                 but also on most test sets. Furthermore, as the
                 dimensionality of the feature selection problem
                 increases, the advantages of SaPSO become more
                 prominent. This highlights that the SaPSO algorithm is
                 suitable for solving feature selection problems,
                 particularly large-scale feature selection problems.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Crescenzi:2019:HCM,
  author =       "Valter Crescenzi and Paolo Merialdo and Disheng Qiu",
  title =        "Hybrid Crowd-Machine Wrapper Inference",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "51:1--51:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344720",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3344720",
  abstract =     "Wrapper inference deals in generating programs to
                 extract data from Web pages. Several supervised and
                 unsupervised wrapper inference approaches have been
                 proposed in the literature. On one hand, unsupervised
                 approaches produce erratic wrappers: whenever the
                 sources do not satisfy underlying assumptions of the
                 inference algorithm, their accuracy is compromised. On
                 the other hand, supervised approaches produce accurate
                 wrappers, but since they need training data, their
                 scalability is limited. The recent advent of
                 crowdsourcing platforms has opened new opportunities
                 for supervised approaches, as they make possible the
                 production of large amounts of training data with the
                 support of workers recruited online. Nevertheless,
                 involving human workers has monetary costs. We present
                 an original hybrid crowd-machine wrapper inference
                 system that offers the benefits of both approaches
                 exploiting the cooperation of crowd workers and
                 unsupervised algorithms. Based on a principled
                 probabilistic model that estimates the quality of
                 wrappers, humans workers are recruited only when
                 unsupervised wrapper induction algorithms are not able
                 to produce sufficiently accurate solutions.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{He:2019:KSA,
  author =       "Kun He and Pan Shi and David Bindel and John E.
                 Hopcroft",
  title =        "{Krylov} Subspace Approximation for Local Community
                 Detection in Large Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "52:1--52:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3340708",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3340708",
  abstract =     "Community detection is an important information mining
                 task to uncover modular structures in large networks.
                 For increasingly common large network datasets, global
                 community detection is prohibitively expensive, and
                 attention has shifted to methods that mine local
                 communities, i.e., identifying all latent members of a
                 particular community from a few labeled seed members.
                 To address such semi-supervised mining task, we
                 systematically develop a local spectral (LOSP)
                 subspace-based community detection method, called LOSP.
                 We define a family of LOSP subspaces based on Krylov
                 subspaces, and seek a sparse indicator for the target
                 community via an $ l_1 $ norm minimization over the
                 Krylov subspace. Variants of LOSP depend on type of
                 random walks with different diffusion speeds, type of
                 random walks, dimension of the LOSP subspace, and step
                 of diffusions. The effectiveness of the proposed LOSP
                 approach is theoretically analyzed based on Rayleigh
                 quotients, and it is experimentally verified on a wide
                 variety of real-world networks across social,
                 production, and biological domains, as well as on an
                 extensive set of synthetic LFR benchmark datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bergamini:2019:CTK,
  author =       "Elisabetta Bergamini and Michele Borassi and Pierluigi
                 Crescenzi and Andrea Marino and Henning Meyerhenke",
  title =        "Computing top-$k$ Closeness Centrality Faster in
                 Unweighted Graphs",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "53:1--53:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344719",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3344719",
  abstract =     "Given a connected graph $ G = (V, E) $, where $V$
                 denotes the set of nodes and $E$ the set of edges of
                 the graph, the length (that is, the number of edges) of
                 the shortest path between two nodes $v$ and $w$ is
                 denoted by $ d(v, w)$. The closeness centrality of a
                 vertex $v$ is then defined as $ n = 1 / \Sigma_{w \in
                 V} d(v, w)$, where $ n = | V |$. This measure is widely
                 used in the analysis of real-world complex networks,
                 and the problem of selecting the $k$ most central
                 vertices has been deeply analyzed in the last decade.
                 However, this problem is computationally not easy,
                 especially for large networks: in the first part of the
                 article, we prove that it is not solvable in time $ O(|
                 E |^{2 = \epsilon })$ on directed graphs, for any
                 constant $ \epsilon > 0$, under reasonable complexity
                 assumptions. Furthermore, we propose a new algorithm
                 for selecting the $k$ most central nodes in a graph: we
                 experimentally show that this algorithm improves
                 significantly both the textbook algorithm, which is
                 based on computing the distance between all pairs of
                 vertices, and the state of the art. For example, we are
                 able to compute the top $k$ nodes in few dozens of
                 seconds in real-world networks with millions of nodes
                 and edges. Finally, as a case study, we compute the 10
                 most central actors in the Internet Movie Database
                 (IMDB) collaboration network, where two actors are
                 linked if they played together in a movie, and in the
                 Wikipedia citation network, which contains a directed
                 edge from a page $p$ to a page $q$ if $p$ contains a
                 link to $q$.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tatti:2019:DFG,
  author =       "Nikolaj Tatti",
  title =        "Density-Friendly Graph Decomposition",
  journal =      j-TKDD,
  volume =       "13",
  number =       "5",
  pages =        "54:1--54:??",
  month =        oct,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3344210",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:02 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3344210",
  abstract =     "Decomposing a graph into a hierarchical structure via
                 $k$-core analysis is a standard operation in any modern
                 graph-mining toolkit. $k$-core decomposition is a
                 simple and efficient method that allows to analyze a
                 graph beyond its mere degree distribution. More
                 specifically, it is used to identify areas in the graph
                 of increasing centrality and connectedness, and it
                 allows to reveal the structural organization of the
                 graph. Despite the fact that $k$-core analysis relies
                 on vertex degrees, $k$-cores do not satisfy a certain,
                 rather natural, density property. Simply put, the most
                 central $k$-core is not necessarily the densest
                 subgraph. This inconsistency between $k$-cores and
                 graph density provides the basis of our study. We start
                 by defining what it means for a subgraph to be locally
                 dense, and we show that our definition entails a nested
                 chain decomposition of the graph, similar to the one
                 given by $k$-cores, but in this case the components are
                 arranged in order of increasing density. We show that
                 such a locally dense decomposition for a graph $ G =
                 (V, E)$ can be computed in polynomial time. The running
                 time of the exact decomposition algorithm is $ O(| V
                 |^2 | E |)$ but is significantly faster in practice. In
                 addition, we develop a linear-time algorithm that
                 provides a factor-2 approximation to the optimal
                 locally dense decomposition. Furthermore, we show that
                 the $k$-core decomposition is also a factor-2
                 approximation, however, as demonstrated by our
                 experimental evaluation, in practice $k$-cores have
                 different structure than locally dense subgraphs, and
                 as predicted by the theory, $k$-cores are not always
                 well-aligned with graph density.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2019:AAL,
  author =       "Peisong Zhu and Zhuang Chen and Haojie Zheng and
                 Tieyun Qian",
  title =        "Aspect Aware Learning for Aspect Category Sentiment
                 Analysis",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "55:1--55:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3350487",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3350487",
  abstract =     "Aspect category sentiment analysis (ACSA) is an
                 underexploited subtask in aspect level sentiment
                 analysis. It aims to identify the sentiment of
                 predefined aspect categories. The main challenge in
                 ACSA comes from the fact that the aspect category may
                 not occur in the sentence in most of the cases. For
                 example, the review `` they have delicious sandwiches
                 '' positively talks about the aspect category `` food
                 '' in an implicit manner. In this article, we propose a
                 novel aspect aware learning (AAL) framework for ACSA
                 tasks. Our key idea is to exploit the interaction
                 between the aspect category and the contents under the
                 guidance of both sentiment polarity and predefined
                 categories. To this end, we design a two-way memory
                 network for integrating AAL into the framework of
                 sentiment classification. We further present two
                 algorithms to incorporate the potential impacts of
                 aspect categories. One is to capture the correlations
                 between aspect terms and the aspect category like
                 ``sandwiches'' and ``food.'' The other is to recognize
                 the aspect category for sentiment representations like
                 ``food'' for ``delicious.'' We conduct extensive
                 experiments on four SemEval datasets. The results
                 reveal the essential role of AAL in ACSA by achieving
                 the state-of-the-art performance.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2019:UFM,
  author =       "Yuandong Wang and Xuelian Lin and Hua Wei and Tianyu
                 Wo and Zhou Huang and Yong Zhang and Jie Xu",
  title =        "A Unified Framework with Multi-source Data for
                 Predicting Passenger Demands of Ride Services",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "56:1--56:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3355563",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3355563",
  abstract =     "Ride-hailing applications have been offering
                 convenient ride services for people in need. However,
                 such applications still suffer from the issue of
                 supply-demand disequilibrium, which is a typical
                 problem for traditional taxi services. With effective
                 predictions on passenger demands, we can alleviate the
                 disequilibrium by pre-dispatching, dynamic pricing or
                 avoiding dispatching cars to zero-demand areas.
                 Existing studies of demand predictions mainly utilize
                 limited data sources, trajectory data, or orders of
                 ride services or both of them, which also lacks a
                 multi-perspective consideration. In this article, we
                 present a unified framework with a new combined model
                 and a road-network-based spatial partition to leverage
                 multi-source data and model the passenger demands from
                 temporal, spatial, and zero-demand-area perspectives.
                 In addition, our framework realizes offline training
                 and online predicting, which can satisfy the real-time
                 requirement more easily. We analyze and evaluate the
                 performance of our combined model using the actual
                 operational data from UCAR. The experimental results
                 indicate that our model outperforms baselines on both
                 Mean Absolute Error and Root Mean Square Error on
                 average.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2019:CLL,
  author =       "Shenghua Liu and Huawei Shen and Houdong Zheng and
                 Xueqi Cheng and Xiangwen Liao",
  title =        "{CT LIS}: Learning Influences and Susceptibilities
                 through Temporal Behaviors",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "57:1--57:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363570",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363570",
  abstract =     "How to quantify influences between users, seeing that
                 social network users influence each other in their
                 temporal behaviors? Previous work has directly defined
                 an independent model parameter to capture the
                 interpersonal influence between each pair of users. To
                 do so, these models need a parameter for each pair of
                 users, which results in high-dimensional models
                 becoming easily trapped into the overfitting problem.
                 However, such models do not consider how influences
                 depend on each other if influences are sent from the
                 same user or if influences are received by the same
                 user. Therefore, we propose a model that defines
                 parameters for every user with a latent influence
                 vector and a susceptibility vector, opposite to define
                 influences on user pairs. Such low-dimensional
                 representations naturally cause the interpersonal
                 influences involving the same user to be coupled with
                 each other, thus reducing the model's complexity.
                 Additionally, the model can easily consider the
                 temporal information and sentimental polarities of
                 users' messages. Finally, we conduct extensive
                 experiments on two real-world Microblog datasets,
                 showing that our model with such representations
                 achieves best performance on three prediction tasks,
                 compared to the state-of-the-art and pair-wise
                 baselines.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2019:HUI,
  author =       "Jimmy Ming-Tai Wu and Jerry Chun-Wei Lin and Ashish
                 Tamrakar",
  title =        "High-Utility Itemset Mining with Effective Pruning
                 Strategies",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "58:1--58:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363571",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363571",
  abstract =     "High-utility itemset mining is a popular data mining
                 problem that considers utility factors, such as
                 quantity and unit profit of items besides frequency
                 measure from the transactional database. It helps to
                 find the most valuable and profitable products/items
                 that are difficult to track by using only the frequent
                 itemsets. An item might have a high-profit value which
                 is rare in the transactional database and has a
                 tremendous importance. While there are many existing
                 algorithms to find high-utility itemsets (HUIs) that
                 generate comparatively large candidate sets, our main
                 focus is on significantly reducing the computation time
                 with the introduction of new pruning strategies. The
                 designed pruning strategies help to reduce the
                 visitation of unnecessary nodes in the search space,
                 which reduces the time required by the algorithm. In
                 this article, two new stricter upper bounds are
                 designed to reduce the computation time by refraining
                 from visiting unnecessary nodes of an itemset. Thus,
                 the search space of the potential HUIs can be greatly
                 reduced, and the mining procedure of the execution time
                 can be improved. The proposed strategies can also
                 significantly minimize the transaction database
                 generated on each node. Experimental results showed
                 that the designed algorithm with two pruning strategies
                 outperform the state-of-the-art algorithms for mining
                 the required HUIs in terms of runtime and number of
                 revised candidates. The memory usage of the designed
                 algorithm also outperforms the state-of-the-art
                 approach. Moreover, a multi-thread concept is also
                 discussed to further handle the problem of big
                 datasets.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Henzgen:2019:MRD,
  author =       "Sascha Henzgen and Eyke H{\"u}llermeier",
  title =        "Mining Rank Data",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "59:1--59:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363572",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363572",
  abstract =     "The problem of frequent pattern mining has been
                 studied quite extensively for various types of data,
                 including sets, sequences, and graphs. Somewhat
                 surprisingly, another important type of data, namely
                 rank data, has received very little attention in data
                 mining so far. In this article, we therefore address
                 the problem of mining rank data, that is, data in the
                 form of rankings (total orders) of an underlying set of
                 items. More specifically, two types of patterns are
                 considered, namely frequent rankings and dependencies
                 between such rankings in the form of association rules.
                 Algorithms for mining frequent rankings and frequent
                 closed rankings are proposed and tested experimentally,
                 using both synthetic and real data.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Roseberry:2019:MLP,
  author =       "Martha Roseberry and Bartosz Krawczyk and Alberto
                 Cano",
  title =        "Multi-Label Punitive {kNN} with Self-Adjusting Memory
                 for Drifting Data Streams",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "60:1--60:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363573",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363573",
  abstract =     "In multi-label learning, data may simultaneously
                 belong to more than one class. When multi-label data
                 arrives as a stream, the challenges associated with
                 multi-label learning are joined by those of data stream
                 mining, including the need for algorithms that are fast
                 and flexible, able to match both the speed and evolving
                 nature of the stream. This article presents a punitive
                 $k$ nearest neighbors algorithm with a self-adjusting
                 memory (MLSAMPkNN) for multi-label, drifting data
                 streams. The memory adjusts in size to contain only the
                 current concept and a novel punitive system identifies
                 and penalizes errant data examples early, removing them
                 from the window. By retaining and using only data that
                 are both current and beneficial, MLSAMPkNN is able to
                 adapt quickly and efficiently to changes within the
                 data stream while still maintaining a low computational
                 complexity. Additionally, the punitive removal
                 mechanism offers increased robustness to various
                 data-level difficulties present in data streams, such
                 as class imbalance and noise. The experimental study
                 compares the proposal to 24 algorithms using 30
                 real-world and 15 artificial multi-label data streams
                 on six multi-label metrics, evaluation time, and memory
                 consumption. The superior performance of the proposed
                 method is validated through non-parametric statistical
                 analysis, proving both high accuracy and low time
                 complexity. MLSAMPkNN is a versatile classifier,
                 capable of returning excellent performance in diverse
                 stream scenarios.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lei:2019:IRU,
  author =       "Yu Lei and Wenjie Li",
  title =        "Interactive Recommendation with User-Specific Deep
                 Reinforcement Learning",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "61:1--61:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3359554",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3359554",
  abstract =     "In this article, we study a multi-step interactive
                 recommendation problem for explicit-feedback
                 recommender systems. Different from the existing works,
                 we propose a novel user-specific deep reinforcement
                 learning approach to the problem. Specifically, we
                 first formulate the problem of interactive
                 recommendation for each target user as a Markov
                 decision process (MDP). We then derive a multi-MDP
                 reinforcement learning task for all involved users. To
                 model the possible relationships (including
                 similarities and differences) between different users'
                 MDPs, we construct user-specific latent states by using
                 matrix factorization. After that, we propose a
                 user-specific deep Q-learning (UDQN) method to estimate
                 optimal policies based on the constructed user-specific
                 latent states. Furthermore, we propose Biased UDQN
                 (BUDQN) to explicitly model user-specific information
                 by employing an additional bias parameter when
                 estimating the Q-values for different users. Finally,
                 we validate the effectiveness of our approach by
                 comprehensive experimental results and analysis.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lee:2019:AMG,
  author =       "John Boaz Lee and Ryan A. Rossi and Sungchul Kim and
                 Nesreen K. Ahmed and Eunyee Koh",
  title =        "Attention Models in Graphs: a Survey",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "62:1--62:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363574",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363574",
  abstract =     "Graph-structured data arise naturally in many
                 different application domains. By representing data as
                 graphs, we can capture entities (i.e., nodes) as well
                 as their relationships (i.e., edges) with each other.
                 Many useful insights can be derived from
                 graph-structured data as demonstrated by an
                 ever-growing body of work focused on graph mining.
                 However, in the real-world, graphs can be both
                 large-with many complex patterns-and noisy, which can
                 pose a problem for effective graph mining. An effective
                 way to deal with this issue is to incorporate
                 ``attention'' into graph mining solutions. An attention
                 mechanism allows a method to focus on task-relevant
                 parts of the graph, helping it to make better
                 decisions. In this work, we conduct a comprehensive and
                 focused survey of the literature on the emerging field
                 of graph attention models. We introduce three intuitive
                 taxonomies to group existing work. These are based on
                 problem setting (type of input and output), the type of
                 attention mechanism used, and the task (e.g., graph
                 classification, link prediction). We motivate our
                 taxonomies through detailed examples and use each to
                 survey competing approaches from a unique standpoint.
                 Finally, we highlight several challenges in the area
                 and discuss promising directions for future work.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2019:PCM,
  author =       "Wangdong Yang and Kenli Li and Keqin Li",
  title =        "A Pipeline Computing Method of {SpTV} for Three-Order
                 Tensors on {CPU} and {GPU}",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "63:1--63:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3363575",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3363575",
  abstract =     "Tensors have drawn a growing attention in many
                 applications, such as physics, engineering science,
                 social networks, recommended systems. Tensor
                 decomposition is the key to explore the inherent
                 intrinsic data relationship of tensor. There are many
                 sparse tensor and vector multiplications (SpTV) in
                 tensor decomposition. We analyze a variety of storage
                 formats of sparse tensors and develop a piecewise
                 compression strategy to improve the storage efficiency
                 of large sparse tensors. This compression strategy can
                 avoid storing a large number of empty slices and empty
                 fibers in sparse tensors, and thus the storage space is
                 significantly reduced. A parallel algorithm for the
                 SpTV based on the high-order compressed format based on
                 slices is designed to greatly improve its computing
                 performance on graphics processing unit. Each tensor is
                 cut into multiple slices to form a series of sparse
                 matrix and vector multiplications, which form the
                 pipelined parallelism. The transmission time of the
                 slices can be hidden through pipelined parallel to
                 further optimize the performance of the SpTV.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2019:RMS,
  author =       "Yu Zhou and Jianbin Huang and Heli Sun and Yizhou Sun
                 and Shaojie Qiao and Stephen Wambura",
  title =        "Recurrent Meta-Structure for Robust Similarity Measure
                 in Heterogeneous Information Networks",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "64:1--64:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3364226",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3364226",
  abstract =     "Similarity measure is one of the fundamental task in
                 heterogeneous information network (HIN) analysis. It
                 has been applied to many areas, such as product
                 recommendation, clustering, and Web search. Most of the
                 existing metrics can provide personalized services for
                 users by taking a meta-path or meta-structure as input.
                 However, these metrics may highly depend on the
                 user-specified meta-path or meta-structure. In
                 addition, users must know how to select an appropriate
                 meta-path or meta-structure. In this article, we
                 propose a novel similarity measure in HINs, called
                 Recurrent Meta-Structure (RecurMS)-based Similarity
                 (RMSS). The RecurMS as a schematic structure in HINs
                 provides a unified framework for integrating all of the
                 meta-paths and meta-structures, and can be constructed
                 automatically by means of repetitively traversing the
                 network schema. In order to formalize the semantics,
                 the RecurMS is decomposed into several recurrent
                 meta-paths and recurrent meta-trees, and we then define
                 the commuting matrices of the recurrent meta-paths and
                 meta-trees. All of these commuting matrices are
                 combined together according to different weights. We
                 propose two kinds of weighting strategies to determine
                 the weights. The first is called the local weighting
                 strategy that depends on the sparsity of the commuting
                 matrices, and the second is called the global weighting
                 strategy that depends on the strength of the commuting
                 matrices. As a result, RMSS is defined by means of the
                 weighted summation of the commuting matrices. Note that
                 RMSS can also provide personalized services for users
                 by means of the weights of the recurrent meta-paths and
                 meta-trees. Experimental evaluations show that the
                 proposed RMSS is robust and outperforms the existing
                 metrics in terms of ranking and clustering task.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Veloso:2019:SSM,
  author =       "Br{\'a}ulio M. Veloso and Renato M. Assun{\c{c}}{\~a}o
                 and Anderson A. Ferreira and Nivio Ziviani",
  title =        "In Search of a Stochastic Model for the E-News
                 Reader",
  journal =      j-TKDD,
  volume =       "13",
  number =       "6",
  pages =        "65:1--65:??",
  month =        dec,
  year =         "2019",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362695",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Dec 18 14:31:03 MST 2019",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/ft_gateway.cfm?id=3362695",
  abstract =     "E-news readers have increasingly at their disposal a
                 broad set of news articles to read. Online newspaper
                 sites use recommender systems to predict and to offer
                 relevant articles to their users. Typically, these
                 recommender systems do not leverage users' reading
                 behavior. If we know how the topics-reads change in a
                 reading session, we may lead to fine-tuned
                 recommendations, for example, after reading a certain
                 number of sports items, it may be counter-productive to
                 keep recommending other sports news. The motivation for
                 this article is the assumption that understanding user
                 behavior when reading successive online news articles
                 can help in developing better recommender systems. We
                 propose five categories of stochastic models to
                 describe this behavior depending on how the previous
                 reading history affects the future choices of topics.
                 We instantiated these five classes with many different
                 stochastic processes covering short-term memory,
                 revealed-preference, cumulative advantage, and
                 geometric sojourn models. Our empirical study is based
                 on large datasets of E-news from two online newspapers.
                 We collected data from more than 13 million users who
                 generated more than 23 million reading sessions, each
                 one composed by the successive clicks of the users on
                 the posted news. We reduce each user session to the
                 sequence of reading news topics. The models were fitted
                 and compared using the Akaike Information Criterion and
                 the Brier Score. We found that the best models are
                 those in which the user moves through topics influenced
                 only by their most recent readings. Our models were
                 also better to predict the next reading than the
                 recommender systems currently used in these journals
                 showing that our models can improve user
                 satisfaction.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hasan:2020:NSA,
  author =       "Md Kamrul Hasan and Christopher Pal",
  title =        "A New Smooth Approximation to the Zero One Loss with a
                 Probabilistic Interpretation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "1:1--1:28",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365672",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365672",
  abstract =     "We examine a new form of smooth approximation to the
                 zero one loss in which learning is performed using a
                 reformulation of the widely used logistic function. Our
                 approach is based on using the posterior mean of a
                 novel generalized Beta-Bernoulli \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mitra:2020:UMV,
  author =       "Sayantan Mitra and Mohammed Hasanuzzaman and Sriparna
                 Saha",
  title =        "A Unified Multi-view Clustering Algorithm Using
                 Multi-objective Optimization Coupled with Generative
                 Model",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "2:1--2:31",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365673",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365673",
  abstract =     "There is a large body of works on multi-view
                 clustering that exploit multiple representations (or
                 views) of the same input data for better convergence.
                 These multiple views can come from multiple modalities
                 (image, audio, text) or different feature \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ni:2020:LOC,
  author =       "Li Ni and Wenjian Luo and Wenjie Zhu and Bei Hua",
  title =        "Local Overlapping Community Detection",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "3:1--3:25",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3361739",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3361739",
  abstract =     "Local community detection refers to finding the
                 community that contains the given node based on local
                 information, which becomes very meaningful when global
                 information about the network is unavailable or
                 expensive to acquire. Most studies on local \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Angiulli:2020:CCF,
  author =       "Fabrizio Angiulli",
  title =        "{CFOF}: a Concentration Free Measure for Anomaly
                 Detection",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "4:1--4:53",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3362158",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3362158",
  abstract =     "We present a novel notion of outlier, called the
                 Concentration Free Outlier Factor, or CFOF. As a main
                 contribution, we formalize the notion of concentration
                 of outlier scores and theoretically prove that CFOF
                 does not concentrate in the Euclidean space \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Arifuzzaman:2020:FPA,
  author =       "Shaikh Arifuzzaman and Maleq Khan and Madhav Marathe",
  title =        "Fast Parallel Algorithms for Counting and Listing
                 Triangles in Big Graphs",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "5:1--5:34",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365676",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365676",
  abstract =     "Big graphs (networks) arising in numerous application
                 areas pose significant challenges for graph analysts as
                 these graphs grow to billions of nodes and edges and
                 are prohibitively large to fit in the main memory.
                 Finding the number of triangles in a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kuang:2020:TEE,
  author =       "Kun Kuang and Peng Cui and Bo Li and Meng Jiang and
                 Yashen Wang and Fei Wu and Shiqiang Yang",
  title =        "Treatment Effect Estimation via Differentiated
                 Confounder Balancing and Regression",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "6:1--6:25",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3365677",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3365677",
  abstract =     "Treatment effect plays an important role on decision
                 making in many fields, such as social marketing,
                 healthcare, and public policy. The key challenge on
                 estimating treatment effect in the wild observational
                 studies is to handle confounding bias induced
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jan:2020:ECC,
  author =       "Zohaib Md. Jan and Brijesh Verma",
  title =        "Evolutionary Classifier and Cluster Selection Approach
                 for Ensemble Classification",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "7:1--7:18",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3366633",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3366633",
  abstract =     "Ensemble classifiers improve the classification
                 performance by combining several classifiers using a
                 suitable fusion methodology. Many ensemble classifier
                 generation methods have been developed that allowed the
                 training of multiple classifiers on a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Queiroz-Sousa:2020:ROT,
  author =       "Paulo Orlando Queiroz-Sousa and Ana Carolina Salgado",
  title =        "A Review on {OLAP} Technologies Applied to Information
                 Networks",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "8:1--8:25",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3370912",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3370912",
  abstract =     "Many real systems produce network data or highly
                 interconnected data, which can be called information
                 networks. These information networks form a critical
                 component in modern information infrastructure,
                 constituting a large graph data volume. The \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nie:2020:ALL,
  author =       "Feiping Nie and Zheng Wang and Rong Wang and Zhen Wang
                 and Xuelong Li",
  title =        "Adaptive Local Linear Discriminant Analysis",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "9:1--9:19",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369870",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369870",
  abstract =     "Dimensionality reduction plays a significant role in
                 high-dimensional data processing, and Linear
                 Discriminant Analysis (LDA) is a widely used supervised
                 dimensionality reduction approach. However, a major
                 drawback of LDA is that it is incapable of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lu:2020:ILS,
  author =       "Xinjiang Lu and Zhiwen Yu and Chuanren Liu and Yanchi
                 Liu and Hui Xiong and Bin Guo",
  title =        "Inferring Lifetime Status of Point-of-Interest: a
                 Multitask Multiclass Approach",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "10:1--10:27",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369799",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369799",
  abstract =     "A Point-of-Interest (POI) refers to a specific
                 location that people may find useful or interesting. In
                 modern cities, a large number of POIs emerge, grow,
                 stabilize for a period, then finally disappear. The
                 stages (e.g., emerge and grow) in this process
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Galimberti:2020:CDM,
  author =       "Edoardo Galimberti and Francesco Bonchi and Francesco
                 Gullo and Tommaso Lanciano",
  title =        "Core Decomposition in Multilayer Networks: Theory,
                 Algorithms, and Applications",
  journal =      j-TKDD,
  volume =       "14",
  number =       "1",
  pages =        "11:1--11:40",
  month =        feb,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369872",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Feb 6 07:36:59 MST 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369872",
  abstract =     "Multilayer networks are a powerful paradigm to model
                 complex systems, where multiple relations occur between
                 the same entities. Despite the keen interest in a
                 variety of tasks, algorithms, and analyses in this type
                 of network, the problem of extracting \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shin:2020:FAP,
  author =       "Kijung Shin and Sejoon Oh and Jisu Kim and Bryan Hooi
                 and Christos Faloutsos",
  title =        "Fast, Accurate and Provable Triangle Counting in Fully
                 Dynamic Graph Streams",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "12:1--12:39",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3375392",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375392",
  abstract =     "Given a stream of edge additions and deletions, how
                 can we estimate the count of triangles in it? If we can
                 store only a subset of the edges, how can we obtain
                 unbiased estimates with small variances? Counting
                 triangles (i.e., cliques of size three) in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Han:2020:GLS,
  author =       "Huimei Han and Xingquan Zhu and Ying Li",
  title =        "Generalizing Long Short-Term Memory Network for Deep
                 Learning from Generic Data",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "13:1--13:28",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3366022",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3366022",
  abstract =     "Long Short-Term Memory (LSTM) network, a popular
                 deep-learning model, is particularly useful for data
                 with temporal correlation, such as texts, sequences, or
                 time series data, thanks to its well-sought after
                 recurrent network structures designed to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2020:BCR,
  author =       "Chi-Chun Lin and Kun-Ta Chuang and Wush Chi-Hsuan Wu
                 and Ming-Syan Chen",
  title =        "Budget-Constrained Real-Time Bidding Optimization:
                 Multiple Predictors Make It Better",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "14:1--14:27",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3375393",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375393",
  abstract =     "In this article, we pursue a better solution for the
                 promising problem, i.e., the bidding strategy design,
                 in the real-time bidding (RTB) advertising (AD)
                 environment. Under the budget constraint, the design of
                 an optimal strategy for bidding on each \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yan:2020:MTI,
  author =       "Xiaoqiang Yan and Zhengzheng Lou and Shizhe Hu and
                 Yangdong Ye",
  title =        "Multi-task Information Bottleneck Co-clustering for
                 Unsupervised Cross-view Human Action Categorization",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "15:1--15:23",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3375394",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375394",
  abstract =     "The widespread adoption of low-cost cameras generates
                 massive amounts of videos recorded from different
                 viewpoints every day. To cope with this vast amount of
                 unlabeled and heterogeneous data, a new multi-task
                 information bottleneck co-clustering (MIBC) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2020:BRP,
  author =       "Bo Lin and Wei Luo and Zhiling Luo and Bo Wang and
                 Shuiguang Deng and Jianwei Yin and Mengchu Zhou",
  title =        "Bradykinesia Recognition in {Parkinson}'s Disease via
                 Single {RGB} Video",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "16:1--16:19",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369438",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369438",
  abstract =     "Parkinson's disease is a progressive nervous system
                 disorder afflicting millions of patients. Among its
                 motor symptoms, bradykinesia is one of the cardinal
                 manifestations. Experienced doctors are required for
                 the clinical diagnosis of bradykinesia, but \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2020:RTT,
  author =       "Shuai Liu and Guojie Song and Wenhao Huang",
  title =        "Real-time Transportation Prediction Correction using
                 Reconstruction Error in Deep Learning",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "17:1--17:20",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369871",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369871",
  abstract =     "In online complex systems such as transportation
                 system, an important work is real-time traffic
                 prediction. Due to the data shift, data model
                 inconsistency, and sudden change of traffic patterns
                 (like transportation accident), the prediction result
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dai:2020:CVE,
  author =       "Chenglong Dai and Dechang Pi and Stefanie I. Becker
                 and Jia Wu and Lin Cui and Blake Johnson",
  title =        "{CenEEGs}: Valid {EEG} Selection for Classification",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "18:1--18:25",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3371153",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3371153",
  abstract =     "This article explores valid brain
                 electroencephalography (EEG) selection for EEG
                 classification with different classifiers, which has
                 been rarely addressed in previous studies and is mostly
                 ignored by existing EEG processing methods and
                 applications. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dong:2020:RCP,
  author =       "Jialin Dong and Kai Yang and Yuanming Shi",
  title =        "Ranking from Crowdsourced Pairwise Comparisons via
                 Smoothed {Riemannian} Optimization",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "19:1--19:26",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372407",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372407",
  abstract =     "Social Internet of Things has recently become a
                 promising paradigm for augmenting the capability of
                 humans and devices connected in the networks to provide
                 services. In social Internet of Things network,
                 crowdsourcing that collects the intelligence of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2020:ANA,
  author =       "Yanan Xu and Yanyan Shen and Yanmin Zhu and Jiadi Yu",
  title =        "{AR 2 Net}: an Attentive Neural Approach for Business
                 Location Selection with Satellite Data and Urban Data",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "20:1--20:28",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372406",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372406",
  abstract =     "Business location selection is crucial to the success
                 of businesses. Traditional approaches like manual
                 survey investigate multiple factors, such as foot
                 traffic, neighborhood structure, and available
                 workforce, which are typically hard to measure. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Concas:2020:MSM,
  author =       "Francesco Concas and Pengfei Xu and Mohammad A. Hoque
                 and Jiaheng Lu and Sasu Tarkoma",
  title =        "Multiple Set Matching with {Bloom} Matrix and {Bloom}
                 Vector",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "21:1--21:21",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372409",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372409",
  abstract =     "Bloom Filter is a space-efficient probabilistic data
                 structure for checking the membership of elements in a
                 set. Given multiple sets, a standard Bloom Filter is
                 not sufficient when looking for the items to which an
                 element or a set of input elements \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2020:CDM,
  author =       "Pei-Zhen Li and Ling Huang and Chang-Dong Wang and
                 Jian-Huang Lai and Dong Huang",
  title =        "Community Detection by Motif-Aware Label Propagation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "22:1--22:19",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3378537",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3378537",
  abstract =     "Community detection (or graph clustering) is crucial
                 for unraveling the structural properties of complex
                 networks. As an important technique in community
                 detection, label propagation has shown the advantage of
                 finding a good community structure with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2020:NAF,
  author =       "Yuan Guo and Yu Sun and Kai Wu and Kerong Jiang",
  title =        "New Algorithms of Feature Selection and Big Data
                 Assignment for {CBR} System Integrated by {Bayesian}
                 Network",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "23:1--23:20",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3373086",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373086",
  abstract =     "Under big data, the integrated system of case-based
                 reasoning and Bayesian network has exhibited great
                 advantage in implementing the intelligence of
                 engineering application in many domains. To further
                 improve the performance of the hybrid system, this
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hua:2020:PTM,
  author =       "Ting Hua and Chang-Tien Lu and Jaegul Choo and Chandan
                 K. Reddy",
  title =        "Probabilistic Topic Modeling for Comparative Analysis
                 of Document Collections",
  journal =      j-TKDD,
  volume =       "14",
  number =       "2",
  pages =        "24:1--24:27",
  month =        mar,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3369873",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Mar 10 08:50:37 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3369873",
  abstract =     "Probabilistic topic models, which can discover hidden
                 patterns in documents, have been extensively studied.
                 However, rather than learning from a single document
                 collection, numerous real-world applications demand a
                 comprehensive understanding of the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lee:2020:LDS,
  author =       "Kwang Hee Lee and Myoung Ho Kim",
  title =        "Linearization of Dependency and Sampling for
                 Participation-based Betweenness Centrality in Very
                 Large {$B$}-hypergraphs",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "25:1--25:41",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3375399",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375399",
  abstract =     "A B-hypergraph consisting of nodes and directed
                 hyperedges is a generalization of the directed graph. A
                 directed hyperedge in the B-hypergraph represents a
                 relation from a set of source nodes to a single
                 destination node. We suggest one possible \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bian:2020:MSM,
  author =       "Jiang Bian and Haoyi Xiong and Yanjie Fu and Jun Huan
                 and Zhishan Guo",
  title =        "{MP$^2$SDA}: Multi-Party Parallelized Sparse
                 Discriminant Learning",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "26:1--26:22",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3374919",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3374919",
  abstract =     "Sparse Discriminant Analysis (SDA) has been widely
                 used to improve the performance of classical Fisher's
                 Linear Discriminant Analysis in supervised metric
                 learning, feature selection, and classification. With
                 the increasing needs of distributed data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2020:ERF,
  author =       "Lei Tang and Zihang Liu and Yaling Zhao and Zongtao
                 Duan and Jingchi Jia",
  title =        "Efficient Ridesharing Framework for Ride-matching via
                 Heterogeneous Network Embedding",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "27:1--27:24",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3373839",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3373839",
  abstract =     "Ridesharing has attracted increasing attention in
                 recent years, and combines the flexibility and speed of
                 private cars with the reduced cost of fixed-line
                 systems to benefit alleviating traffic pressure. A
                 major issue in ridesharing is the accurate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ermis:2020:DSD,
  author =       "Beyza Ermis and A. Taylan CemgIl",
  title =        "Data Sharing via Differentially Private Coupled Matrix
                 Factorization",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "28:1--28:27",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3372408",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3372408",
  abstract =     "We address the privacy-preserving data-sharing problem
                 in a distributed multiparty setting. In this setting,
                 each data site owns a distinct part of a dataset and
                 the aim is to estimate the parameters of a statistical
                 model conditioned on the complete \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2020:CIM,
  author =       "Yu Yang and Xiangbo Mao and Jian Pei and Xiaofei He",
  title =        "Continuous Influence Maximization",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "29:1--29:38",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3380928",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3380928",
  abstract =     "Imagine we are introducing a new product through a
                 social network, where we know for each user in the
                 network the function of purchase probability with
                 respect to discount. Then, what discounts should we
                 offer to those social network users so that, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ostovar:2020:RDC,
  author =       "Alireza Ostovar and Sander J. J. Leemans and Marcello
                 {La Rosa}",
  title =        "Robust Drift Characterization from Event Streams of
                 Business Processes",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "30:1--30:57",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3375398",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3375398",
  abstract =     "Process workers may vary the normal execution of a
                 business process to adjust to changes in their
                 operational environment, e.g., changes in workload,
                 season, or regulations. Changes may be simple, such as
                 skipping an individual activity, or complex, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2020:SFE,
  author =       "Bang Liu and Fred X. Han and Di Niu and Linglong Kong
                 and Kunfeng Lai and Yu Xu",
  title =        "{Story Forest}: Extracting Events and Telling Stories
                 from Breaking News",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "31:1--31:28",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3377939",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3377939",
  abstract =     "Extracting events accurately from vast news corpora
                 and organize events logically is critical for news apps
                 and search engines, which aim to organize news
                 information collected from the Internet and present it
                 to users in the most sensible forms. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Akhtar:2020:DMT,
  author =       "Md Shad Akhtar and Dushyant Singh Chauhan and Asif
                 Ekbal",
  title =        "A Deep Multi-task Contextual Attention Framework for
                 Multi-modal Affect Analysis",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "32:1--32:27",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3380744",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3380744",
  abstract =     "Multi-modal affect analysis (e.g., sentiment and
                 emotion analysis) is an interdisciplinary study and has
                 been an emerging and prominent field in Natural
                 Language Processing and Computer Vision. The effective
                 fusion of multiple modalities (e.g., text, \ldots{}).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Abd-Elaziz:2020:EDM,
  author =       "M. M. Abd-Elaziz and Hazem M. El-Bakry and Ahmed Abou
                 Elfetouh and Amira Elzeiny",
  title =        "Enhanced Data Mining Technique to Measure Satisfaction
                 Degree of Social Media Users of {Xeljanz} Drug",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "33:1--33:13",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3389433",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3389433",
  abstract =     "In the recent times, social media has become important
                 in the field of health care as a major resource of
                 valuable health information. Social media can provide
                 massive amounts of data in real-time through user
                 interaction, and this data can be analysed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tuomo:2020:BCC,
  author =       "Alasalmi Tuomo and Jaakko Suutala and Juha R{\"o}ning
                 and Heli Koskim{\"a}ki",
  title =        "Better Classifier Calibration for Small Datasets",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "34:1--34:19",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385656",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385656",
  abstract =     "Classifier calibration does not always go hand in hand
                 with the classifier's ability to separate the classes.
                 There are applications where good classifier
                 calibration, i.e., the ability to produce accurate
                 probability estimates, is more important than
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amornbunchornvej:2020:FIF,
  author =       "Chainarong Amornbunchornvej and Tanya Berger-Wolf",
  title =        "Framework for Inferring Following Strategies from Time
                 Series of Movement Data",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "35:1--35:22",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385730",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385730",
  abstract =     "How do groups of individuals achieve consensus in
                 movement decisions? Do individuals follow their
                 friends, the one predetermined leader, or whomever just
                 happens to be nearby? To address these questions
                 computationally, we formalize Coordination S.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2020:NEC,
  author =       "Heli Sun and Fang He and Jianbin Huang and Yizhou Sun
                 and Yang Li and Chenyu Wang and Liang He and Zhongbin
                 Sun and Xiaolin Jia",
  title =        "Network Embedding for Community Detection in
                 Attributed Networks",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "36:1--36:25",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385415",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385415",
  abstract =     "Community detection aims to partition network nodes
                 into a set of clusters, such that nodes are more
                 densely connected to each other within the same cluster
                 than other clusters. For attributed networks, apart
                 from the denseness requirement of topology \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lappas:2020:MCP,
  author =       "Theodoros Lappas",
  title =        "Mining Career Paths from Large Resume Databases:
                 Evidence from {IT} Professionals",
  journal =      j-TKDD,
  volume =       "14",
  number =       "3",
  pages =        "37:1--37:38",
  month =        may,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3379984",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue May 19 09:32:05 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3379984",
  abstract =     "The emergence of online professional platforms, such
                 as LinkedIn and Indeed, has led to unprecedented
                 volumes of rich resume data that have revolutionized
                 the study of careers. One of the most prevalent
                 problems in this space is the extraction of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2020:INA,
  author =       "Si Zhang and Hanghang Tong and Jie Tang and Jiejun Xu
                 and Wei Fan",
  title =        "Incomplete Network Alignment: Problem Definitions and
                 Fast Solutions",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "38:1--38:26",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3384203",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3384203",
  abstract =     "Networks are prevalent in many areas and are often
                 collected from multiple sources. However, due to the
                 veracity characteristics, more often than not, networks
                 are incomplete. Network alignment and network
                 completion have become two fundamental \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2020:FDA,
  author =       "Bintao Sun and T.-H. Hubert Chan and Mauro Sozio",
  title =        "Fully Dynamic Approximate $k$-Core Decomposition in
                 Hypergraphs",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "39:1--39:21",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385416",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385416",
  abstract =     "In this article, we design algorithms to maintain
                 approximate core values in dynamic hypergraphs. This
                 notion has been well studied for normal graphs in both
                 static and dynamic setting. We generalize the problem
                 to hypergraphs when edges can be inserted \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Balasubramaniam:2020:ENT,
  author =       "Thirunavukarasu Balasubramaniam and Richi Nayak and
                 Chau Yuen",
  title =        "Efficient Nonnegative Tensor Factorization via
                 Saturating Coordinate Descent",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "40:1--40:28",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385654",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385654",
  abstract =     "With the advancements in computing technology and
                 web-based applications, data are increasingly generated
                 in multi-dimensional form. These data are usually
                 sparse due to the presence of a large number of users
                 and fewer user interactions. To deal with \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kong:2020:GSS,
  author =       "Xiangjie Kong and Jun Zhang and Da Zhang and Yi Bu and
                 Ying Ding and Feng Xia",
  title =        "The Gene of Scientific Success",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "41:1--41:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385530",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385530",
  abstract =     "This article elaborates how to identify and evaluate
                 causal factors to improve scientific impact. Currently,
                 analyzing scientific impact can be beneficial to
                 various academic activities including funding
                 application, mentor recommendation, discovering
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2020:CTF,
  author =       "Cen Chen and Kenli Li and Sin G. Teo and Xiaofeng Zou
                 and Keqin Li and Zeng Zeng",
  title =        "Citywide Traffic Flow Prediction Based on Multiple
                 Gated Spatio-temporal Convolutional Neural Networks",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "42:1--42:23",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385414",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385414",
  abstract =     "Traffic flow prediction is crucial for public safety
                 and traffic management, and remains a big challenge
                 because of many complicated factors, e.g., multiple
                 spatio-temporal dependencies, holidays, and weather.
                 Some work leveraged 2D convolutional neural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2020:SCM,
  author =       "Tianyu Zhu and Guannan Liu and Guoqing Chen",
  title =        "Social Collaborative Mutual Learning for Item
                 Recommendation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "43:1--43:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3387162",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3387162",
  abstract =     "Recommender Systems (RSs) provide users with item
                 choices based on their preferences reflected in past
                 interactions and become important tools to alleviate
                 the information overload problem for users. However, in
                 real-world scenarios, the user-item \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Constantinou:2020:LBN,
  author =       "Anthony C. Constantinou",
  title =        "Learning {Bayesian} Networks with the Saiyan
                 Algorithm",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "44:1--44:21",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385655",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3385655",
  abstract =     "Some structure learning algorithms have proven to be
                 effective in reconstructing hypothetical Bayesian
                 Network graphs from synthetic data. However, in their
                 mission to maximise a scoring function, many become
                 conservative and minimise edges discovered. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2020:EEB,
  author =       "Changping Wang and Chaokun Wang and Zheng Wang and
                 Xiaojun Ye and Philip S. Yu",
  title =        "{Edge2vec}: Edge-based Social Network Embedding",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "45:1--45:24",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3391298",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3391298",
  abstract =     "Graph embedding, also known as network embedding and
                 network representation learning, is a useful technique
                 which helps researchers analyze information networks
                 through embedding a network into a low-dimensional
                 space. However, existing graph embedding \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2020:SGC,
  author =       "Xiaofeng Zhu and Shichao Zhang and Jilian Zhang and
                 Yonggang Li and Guangquan Lu and Yang Yang",
  title =        "Sparse Graph Connectivity for Image Segmentation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "46:1--46:19",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397188",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3397188",
  abstract =     "It has been demonstrated that the segmentation
                 performance is highly dependent on both subspace
                 preservation and graph connectivity. In the literature,
                 the full connectivity method linearly represents each
                 data point (e.g., a pixel in one image) by all
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Marques:2020:IEU,
  author =       "Henrique O. Marques and Ricardo J. G. B. Campello and
                 J{\"u}rg Sander and Arthur Zimek",
  title =        "Internal Evaluation of Unsupervised Outlier
                 Detection",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "47:1--47:42",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394053",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3394053",
  abstract =     "Although there is a large and growing literature that
                 tackles the unsupervised outlier detection problem, the
                 unsupervised evaluation of outlier detection results is
                 still virtually untouched in the literature. The
                 so-called internal evaluation, based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2020:SWM,
  author =       "Xiaofeng Zhu and Shichao Zhang and Yonghua Zhu and Wei
                 Zheng and Yang Yang",
  title =        "Self-weighted Multi-view Fuzzy Clustering",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "48:1--48:17",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396238",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3396238",
  abstract =     "Since the data in each view may contain distinct
                 information different from other views as well as has
                 common information for all views in multi-view
                 learning, many multi-view clustering methods have been
                 designed to use these information (including the
                 \ldots{}).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Das:2020:DAI,
  author =       "Shubhomoy Das and Weng-Keen Wong and Thomas Dietterich
                 and Alan Fern and Andrew Emmott",
  title =        "Discovering Anomalies by Incorporating Feedback from
                 an Expert",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "49:1--49:32",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396608",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3396608",
  abstract =     "Unsupervised anomaly detection algorithms search for
                 outliers and then predict that these outliers are the
                 anomalies. When deployed, however, these algorithms are
                 often criticized for high false-positive and high
                 false-negative rates. One main cause of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2020:NSR,
  author =       "Yuanbo Xu and Yongjian Yang and En Wang and Jiayu Han
                 and Fuzhen Zhuang and Zhiwen Yu and Hui Xiong",
  title =        "Neural Serendipity Recommendation: Exploring the
                 Balance between Accuracy and Novelty with Sparse
                 Explicit Feedback",
  journal =      j-TKDD,
  volume =       "14",
  number =       "4",
  pages =        "50:1--50:25",
  month =        jul,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3396607",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jul 10 13:39:39 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/abs/10.1145/3396607",
  abstract =     "Recommender systems have been playing an important
                 role in providing personalized information to users.
                 However, there is always a trade-off between accuracy
                 and novelty in recommender systems. Usually, many users
                 are suffering from redundant or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xiong:2020:ISI,
  author =       "Hui Xiong and Chih-Jen Lin",
  title =        "Introduction to the Special Issue on the Best Papers
                 from {KDD 2018}",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "51e:1--51e:2",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3407901",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3407901",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51e",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2020:TOO,
  author =       "Ping Zhang and Zhifeng Bao and Yuchen Li and Guoliang
                 Li and Yipeng Zhang and Zhiyong Peng",
  title =        "Towards an Optimal Outdoor Advertising Placement: When
                 a Budget Constraint Meets Moving Trajectories",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "51:1--51:32",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3350488",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3350488",
  abstract =     "In this article, we propose and study the problem of
                 trajectory-driven influential billboard placement:
                 given a set of billboards U (each with a location and a
                 cost), a database of trajectories T, and a budget L, we
                 find a set of billboards within the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xiao:2020:MUM,
  author =       "Keli Xiao and Zeyang Ye and Lihao Zhang and Wenjun
                 Zhou and Yong Ge and Yuefan Deng",
  title =        "Multi-User Mobile Sequential Recommendation for Route
                 Optimization",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "52:1--52:28",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3360048",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3360048",
  abstract =     "We enhance the mobile sequential recommendation (MSR)
                 model and address some critical issues in existing
                 formulations by proposing three new forms of the MSR
                 from a multi-user perspective. The multi-user MSR
                 (MMSR) model searches optimal routes for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huai:2020:LDM,
  author =       "Mengdi Huai and Chenglin Miao and Yaliang Li and
                 Qiuling Suo and Lu Su and Aidong Zhang",
  title =        "Learning Distance Metrics from Probabilistic
                 Information",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "53:1--53:33",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3364320",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3364320",
  abstract =     "The goal of metric learning is to learn a good
                 distance metric that can capture the relationships
                 among instances, and its importance has long been
                 recognized in many fields. An implicit assumption in
                 the traditional settings of metric learning is that
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2020:PMG,
  author =       "Hongyuan Zhu and Qi Liu and Nicholas Jing Yuan and Kun
                 Zhang and Guang Zhou and Enhong Chen",
  title =        "Pop Music Generation: From Melody to Multi-style
                 Arrangement",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "54:1--54:31",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3374915",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3374915",
  abstract =     "Music plays an important role in our daily life. With
                 the development of deep learning and modern generation
                 techniques, researchers have done plenty of works on
                 automatic music generation. However, due to the special
                 requirements of both melody and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mautz:2020:NRS,
  author =       "Dominik Mautz and Wei Ye and Claudia Plant and
                 Christian B{\"o}hm",
  title =        "Non-Redundant Subspace Clusterings with {Nr-Kmeans}
                 and {Nr-DipMeans}",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "55:1--55:24",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385652",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3385652",
  abstract =     "A huge object collection in high-dimensional space can
                 often be clustered in more than one way, for instance,
                 objects could be clustered by their shape or
                 alternatively by their color. Each grouping represents
                 a different view of the dataset. The new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Riondato:2020:MMI,
  author =       "Matteo Riondato and Fabio Vandin",
  title =        "{MiSoSouP}: Mining Interesting Subgroups with Sampling
                 and Pseudodimension",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "56:1--56:31",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3385653",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3385653",
  abstract =     "We present MiSoSouP, a suite of algorithms for
                 extracting high-quality approximations of the most
                 interesting subgroups, according to different popular
                 interestingness measures, from a random sample of a
                 transactional dataset. We describe a new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zugner:2020:AAG,
  author =       "Daniel Z{\"u}gner and Oliver Borchert and Amir
                 Akbarnejad and Stephan G{\"u}nnemann",
  title =        "Adversarial Attacks on Graph Neural Networks:
                 Perturbations and their Patterns",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "57:1--57:31",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394520",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3394520",
  abstract =     "Deep learning models for graphs have achieved strong
                 performance for the task of node classification.
                 Despite their proliferation, little is known about
                 their robustness to adversarial attacks. Yet, in
                 domains where they are likely to be used, e.g., the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2020:EAK,
  author =       "Xu Zhou and Kenli Li and Zhibang Yang and Yunjun Gao
                 and Keqin Li",
  title =        "Efficient Approaches to $k$ Representative {G-Skyline}
                 Queries",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "58:1--58:27",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397503",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3397503",
  abstract =     "The G-Skyline (GSky) query is a powerful tool to
                 analyze optimal groups in decision support. Compared
                 with other group skyline queries, it releases users
                 from providing an aggregate function. Besides, it can
                 get much comprehensive results without \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2020:UFS,
  author =       "Peilin Zhao and Dayong Wang and Pengcheng Wu and
                 Steven C. H. Hoi",
  title =        "A Unified Framework for Sparse Online Learning",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "59:1--59:20",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3361559",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3361559",
  abstract =     "The amount of data in our society has been exploding
                 in the era of big data. This article aims to address
                 several open challenges in big data stream
                 classification. Many existing studies in data mining
                 literature follow the batch learning setting, which
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ceccarello:2020:GCB,
  author =       "Matteo Ceccarello and Andrea Pietracaprina and Geppino
                 Pucci",
  title =        "A General Coreset-Based Approach to Diversity
                 Maximization under Matroid Constraints",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "60:1--60:27",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3402448",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3402448",
  abstract =     "Diversity maximization is a fundamental problem in web
                 search and data mining. For a given dataset $S$ of $n$
                 elements, the problem requires to determine a subset of
                 $S$ containing $ k \ll n$ ``representatives'' which
                 maximize some diversity function expressed in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nguyen:2020:EEC,
  author =       "Hung Nguyen and Xuejian Wang and Leman Akoglu",
  title =        "End-to-End Continual Rare-Class Recognition with
                 Emerging Novel Subclasses",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "61:1--61:28",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399660",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399660",
  abstract =     "Given a labeled dataset that contains a rare (or
                 minority) class containing of-interest instances, as
                 well as a large class of instances that are not of
                 interest, how can we learn to recognize future
                 of-interest instances over a continuous stream? The
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2020:EMO,
  author =       "Tingting Wang and Lei Duan and Guozhu Dong and Zhifeng
                 Bao",
  title =        "Efficient Mining of Outlying Sequence Patterns for
                 Analyzing Outlierness of Sequence Data",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "62:1--62:26",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399671",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399671",
  abstract =     "Recently, a lot of research work has been proposed in
                 different domains to detect outliers and analyze the
                 outlierness of outliers for relational data. However,
                 while sequence data is ubiquitous in real life,
                 analyzing the outlierness for sequence data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rossi:2020:PSR,
  author =       "Ryan A. Rossi and Di Jin and Sungchul Kim and Nesreen
                 K. Ahmed and Danai Koutra and John Boaz Lee",
  title =        "On Proximity and Structural Role-based Embeddings in
                 Networks: Misconceptions, Techniques, and
                 Applications",
  journal =      j-TKDD,
  volume =       "14",
  number =       "5",
  pages =        "63:1--63:37",
  month =        aug,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3397191",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Aug 28 11:59:01 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3397191",
  abstract =     "Structural roles define sets of structurally similar
                 nodes that are more similar to nodes inside the set
                 than outside, whereas communities define sets of nodes
                 with more connections inside the set than outside.
                 Roles based on structural similarity and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nikolakopoulos:2020:BIB,
  author =       "Athanasios N. Nikolakopoulos and George Karypis",
  title =        "Boosting Item-based Collaborative Filtering via Nearly
                 Uncoupled Random Walks",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "64:1--64:26",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406241",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406241",
  abstract =     "Item-based models are among the most popular
                 collaborative filtering approaches for building
                 recommender systems. Random walks can provide a
                 powerful tool for harvesting the rich network of
                 interactions captured within these models. They can
                 exploit \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2020:NAB,
  author =       "Jiarong Xu and Yifan Luo and Jianrong Tao and Changjie
                 Fan and Zhou Zhao and Jiangang Lu",
  title =        "{NGUARD+}: an Attention-based Game Bot Detection
                 Framework via Player Behavior Sequences",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "65:1--65:24",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399711",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399711",
  abstract =     "Game bots are automated programs that assist cheating
                 users, leading to an imbalance in the game ecosystem
                 and the collapse of user interest. Online games provide
                 immersive gaming experience and attract many loyal
                 fans. However, game bots have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2020:IMS,
  author =       "Jianxiong Guo and Weili Wu",
  title =        "Influence Maximization: Seeding Based on Community
                 Structure",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "66:1--66:22",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399661",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399661",
  abstract =     "Influence maximization problem attempts to find a
                 small subset of nodes in a social network that makes
                 the expected influence maximized, which has been
                 researched intensively before. Most of the existing
                 literature focus only on maximizing total \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hu:2020:EUP,
  author =       "Renjun Hu and Yanchi Liu and Yanyan Li and Jingbo Zhou
                 and Shuai Ma and Hui Xiong",
  title =        "Exploiting User Preference and Mobile Peer Influence
                 for Human Mobility Annotation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "67:1--67:18",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406600",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406600",
  abstract =     "Human mobility annotation aims to assign mobility
                 records the corresponding visiting Point-of-Interests
                 (POIs). It is one of the most fundamental problems for
                 understanding human mobile behaviors. In literature,
                 many efforts have been devoted to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pang:2020:HUO,
  author =       "Guansong Pang and Longbing Cao",
  title =        "Heterogeneous Univariate Outlier Ensembles in
                 Multidimensional Data",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "68:1--68:27",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3403934",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3403934",
  abstract =     "In outlier detection, recent major research has
                 shifted from developing univariate methods to
                 multivariate methods due to the rapid growth of
                 multidimensional data. However, one typical issue of
                 this paradigm shift is that many multidimensional data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zamzami:2020:PMF,
  author =       "Nuha Zamzami and Nizar Bouguila",
  title =        "Probabilistic Modeling for Frequency Vectors Using a
                 Flexible Shifted-Scaled {Dirichlet} Distribution
                 Prior",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "69:1--69:35",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406242",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406242",
  abstract =     "Burstiness and overdispersion phenomena of count
                 vectors pose significant challenges in modeling such
                 data accurately. While the dependency assumption of the
                 multinomial distribution causes its failure to model
                 frequency vectors in several machine \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Paudel:2020:ACD,
  author =       "Ramesh Paudel and William Eberle",
  title =        "An Approach For Concept Drift Detection in a Graph
                 Stream Using Discriminative Subgraphs",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "70:1--70:25",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3406243",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3406243",
  abstract =     "The emergence of mining complex networks like social
                 media, sensor networks, and the world-wide-web has
                 attracted considerable research interest. In a
                 streaming scenario, the concept to be learned can
                 change over time. However, while there has been some
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mohotti:2020:EOD,
  author =       "Wathsala Anupama Mohotti and Richi Nayak",
  title =        "Efficient Outlier Detection in Text Corpus Using Rare
                 Frequency and Ranking",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "71:1--71:30",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3399712",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3399712",
  abstract =     "Outlier detection in text data collections has become
                 significant due to the need of finding anomalies in the
                 myriad of text data sources. High feature
                 dimensionality, together with the larger size of these
                 document collections, presents a need for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2020:TWS,
  author =       "Chen Zhang and Steven C. H. Hoi and Fugee Tsung",
  title =        "Time-Warped Sparse Non-negative Factorization for
                 Functional Data Analysis",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "72:1--72:23",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3408313",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3408313",
  abstract =     "This article proposes a novel time-warped sparse
                 non-negative factorization method for functional data
                 analysis. The proposed method on the one hand
                 guarantees the extracted basis functions and their
                 coefficients to be positive and interpretable, and on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Matheny:2020:SSS,
  author =       "Michael Matheny and Dong Xie and Jeff M. Phillips",
  title =        "Scalable Spatial Scan Statistics for Trajectories",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "73:1--73:24",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3394046",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3394046",
  abstract =     "We define several new models for how to define
                 anomalous regions among enormous sets of trajectories.
                 These are based on spatial scan statistics, and
                 identify a geometric region which captures a subset of
                 trajectories which are significantly different
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "73",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2020:BDR,
  author =       "Shuangyin Li and Yu Zhang and Rong Pan",
  title =        "Bi-Directional Recurrent Attentional Topic Model",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "74:1--74:30",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3412371",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3412371",
  abstract =     "In a document, the topic distribution of a sentence
                 depends on both the topics of its neighbored sentences
                 and its own content, and it is usually affected by the
                 topics of the neighbored sentences with different
                 weights. The neighbored sentences of a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "74",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2020:RAL,
  author =       "Jipeng Guo and Yanfeng Sun and Junbin Gao and Yongli
                 Hu and Baocai Yin",
  title =        "Robust Adaptive Linear Discriminant Analysis with
                 Bidirectional Reconstruction Constraint",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "75:1--75:20",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3409478",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3409478",
  abstract =     "Linear discriminant analysis (LDA) is a well-known
                 supervised method for dimensionality reduction in which
                 the global structure of data can be preserved. The
                 classical LDA is sensitive to the noises, and the
                 projection direction of LDA cannot preserve \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "75",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Savva:2020:LSD,
  author =       "Fotis Savva and Christos Anagnostopoulos and Peter
                 Triantafillou and Kostas Kolomvatsos",
  title =        "Large-scale Data Exploration Using Explanatory
                 Regression Functions",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "76:1--76:33",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3410448",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3410448",
  abstract =     "Analysts wishing to explore multivariate data spaces,
                 typically issue queries involving selection operators,
                 i.e., range or equality predicates, which define data
                 subspaces of potential interest. Then, they use
                 aggregation functions, the results of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "76",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ma:2020:RRT,
  author =       "Qian Ma and Yu Gu and Wang-Chien Lee and Ge Yu and
                 Hongbo Liu and Xindong Wu",
  title =        "{REMIAN}: Real-Time and Error-Tolerant Missing Value
                 Imputation",
  journal =      j-TKDD,
  volume =       "14",
  number =       "6",
  pages =        "77:1--77:38",
  month =        oct,
  year =         "2020",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3412364",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Oct 8 06:52:44 MDT 2020",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3412364",
  abstract =     "Missing value (MV) imputation is a critical
                 preprocessing means for data mining. Nevertheless,
                 existing MV imputation methods are mostly designed for
                 batch processing, and thus are not applicable to
                 streaming data, especially those with poor quality. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "77",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:HPR,
  author =       "Hao Wang and Shuai Ding and Yeqing Li and Xiaojian Li
                 and Youtao Zhang",
  title =        "Hierarchical Physician Recommendation via
                 Diversity-enhanced Matrix Factorization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "1:1--1:17",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418227",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418227",
  abstract =     "Recent studies have shown that there exhibits
                 significantly imbalanced medical resource allocation
                 across public hospitals. Patients, regardless of their
                 diseases, tend to choose hospitals and physicians with
                 a better reputation, which often overloads \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Galimberti:2021:SCD,
  author =       "Edoardo Galimberti and Martino Ciaperoni and Alain
                 Barrat and Francesco Bonchi and Ciro Cattuto and
                 Francesco Gullo",
  title =        "Span-core Decomposition for Temporal Networks:
                 Algorithms and Applications",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "2:1--2:44",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418226",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418226",
  abstract =     "When analyzing temporal networks, a fundamental task
                 is the identification of dense structures (i.e., groups
                 of vertices that exhibit a large number of links),
                 together with their temporal span (i.e., the period of
                 time for which the high density holds). \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2021:DGM,
  author =       "Yu Huang and Josh Jia-Ching Ying and Philip S. Yu and
                 Vincent S. Tseng",
  title =        "Dynamic Graph Mining for Multi-weight
                 Multi-destination Route Planning with Deadlines
                 Constraints",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "3:1--3:32",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3412363",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3412363",
  abstract =     "Route planning satisfied multiple requests is an
                 emerging branch in the route planning field and has
                 attracted significant attention from the research
                 community in recent years. The prevailing studies focus
                 only on seeking a route by minimizing a single
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Siers:2021:CIC,
  author =       "Michael J. Siers and Md Zahidul Islam",
  title =        "Class Imbalance and Cost-Sensitive Decision Trees: a
                 Unified Survey Based on a Core Similarity",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "4:1--4:31",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3415156",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3415156",
  abstract =     "Class imbalance treatment methods and cost-sensitive
                 classification algorithms are typically treated as two
                 independent research areas. However, many of these
                 techniques have properties in common. After providing a
                 background to the two fields of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2021:MSN,
  author =       "Hong Huang and Yu Song and Fanghua Ye and Xing Xie and
                 Xuanhua Shi and Hai Jin",
  title =        "Multi-Stage Network Embedding for Exploring
                 Heterogeneous Edges",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "5:1--5:27",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3415157",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3415157",
  abstract =     "The relationships between objects in a network are
                 typically diverse and complex, leading to the
                 heterogeneous edges with different semantic
                 information. In this article, we focus on exploring the
                 heterogeneous edges for network representation
                 learning. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hu:2021:RTR,
  author =       "Yue Hu and Daniel B. Work",
  title =        "Robust Tensor Recovery with Fiber Outliers for Traffic
                 Events",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "6:1--6:27",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3417337",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3417337",
  abstract =     "Event detection is gaining increasing attention in
                 smart cities research. Large-scale mobility data serves
                 as an important tool to uncover the dynamics of urban
                 transportation systems, and more often than not the
                 dataset is incomplete. In this article, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2021:ARD,
  author =       "Xiaoyan Zhu and Yingbin Li and Jiayin Wang and Tian
                 Zheng and Jingwen Fu",
  title =        "Automatic Recommendation of a Distance Measure for
                 Clustering Algorithms",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "7:1--7:22",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418228",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418228",
  abstract =     "With a large number of distance measures, the
                 appropriate choice for clustering a given data set with
                 a specified clustering algorithm becomes an important
                 problem. In this article, an automatic distance measure
                 recommendation method for clustering \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bernardini:2021:CAS,
  author =       "Giulia Bernardini and Huiping Chen and Alessio Conte
                 and Roberto Grossi and Grigorios Loukides and Nadia
                 Pisanti and Solon P. Pissis and Giovanna Rosone and
                 Michelle Sweering",
  title =        "Combinatorial Algorithms for String Sanitization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "8:1--8:34",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418683",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418683",
  abstract =     "String data are often disseminated to support
                 applications such as location-based service provision
                 or DNA sequence analysis. This dissemination, however,
                 may expose sensitive patterns that model confidential
                 knowledge (e.g., trips to mental health \ldots{}).",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rossi:2021:HG,
  author =       "Ryan A. Rossi and Nesreen K. Ahmed and Aldo Carranza
                 and David Arbour and Anup Rao and Sungchul Kim and
                 Eunyee Koh",
  title =        "Heterogeneous Graphlets",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "9:1--9:43",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418773",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418773",
  abstract =     "In this article, we introduce a generalization of
                 graphlets to heterogeneous networks called typed
                 graphlets. Informally, typed graphlets are small typed
                 induced subgraphs. Typed graphlets generalize graphlets
                 to rich heterogeneous networks as they \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ji:2021:ALS,
  author =       "Yugang Ji and Mingyang Yin and Hongxia Yang and
                 Jingren Zhou and Vincent W. Zheng and Chuan Shi and
                 Yuan Fang",
  title =        "Accelerating Large-Scale Heterogeneous Interaction
                 Graph Embedding Learning via Importance Sampling",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "10:1--10:23",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3418684",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3418684",
  abstract =     "In real-world problems, heterogeneous entities are
                 often related to each other through multiple
                 interactions, forming a Heterogeneous Interaction Graph
                 (HIG). While modeling HIGs to deal with fundamental
                 tasks, graph neural networks present an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xun:2021:MPI,
  author =       "Guangxu Xun and Kishlay Jha and Aidong Zhang",
  title =        "{MeSHProbeNet-P}: Improving Large-scale {MeSH}
                 Indexing with Personalizable {MeSH} Probes",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "11:1--11:14",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3421713",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421713",
  abstract =     "Indexing biomedical research articles with Medical
                 Subject Headings (MeSH) can greatly facilitate
                 biomedical research and information retrieval.
                 Currently MeSH indexing is performed by human experts.
                 To alleviate the time consumption and monetary cost
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tu:2021:CCJ,
  author =       "Jinzheng Tu and Guoxian Yu and Jun Wang and Carlotta
                 Domeniconi and Maozu Guo and Xiangliang Zhang",
  title =        "{CrowdWT}: Crowdsourcing via Joint Modeling of Workers
                 and Tasks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "12:1--12:24",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3421712",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3421712",
  abstract =     "Crowdsourcing is a relatively inexpensive and
                 efficient mechanism to collect annotations of data from
                 the open Internet. Crowdsourcing workers are paid for
                 the provided annotations, but the task requester
                 usually has a limited budget. It is desirable to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Azevedo:2021:RNT,
  author =       "Ricardo {De Azevedo} and Gabriel Resende Machado and
                 Ronaldo Ribeiro Goldschmidt and Ricardo Choren",
  title =        "A Reduced Network Traffic Method for {IoT} Data
                 Clustering",
  journal =      j-TKDD,
  volume =       "15",
  number =       "1",
  pages =        "13:1--13:23",
  month =        jan,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3423139",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Mar 28 09:45:00 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3423139",
  abstract =     "Internet of Things (IoT) systems usually involve
                 interconnected, low processing capacity, and low memory
                 sensor nodes (devices) that collect data in several
                 sorts of applications that interconnect people and
                 things. In this scenario, mining tasks, such \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rossi:2021:KGE,
  author =       "Andrea Rossi and Denilson Barbosa and Donatella
                 Firmani and Antonio Matinata and Paolo Merialdo",
  title =        "Knowledge Graph Embedding for Link Prediction: a
                 Comparative Analysis",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "14:1--14:49",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424672",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424672",
  abstract =     "Knowledge Graphs (KGs) have found many applications in
                 industrial and in academic settings, which in turn,
                 have motivated considerable research efforts towards
                 large-scale information extraction from a variety of
                 sources. Despite such efforts, it is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amornbunchornvej:2021:ILM,
  author =       "Chainarong Amornbunchornvej and Navaporn Surasvadi and
                 Anon Plangprasopchok and Suttipong Thajchayapong",
  title =        "Identifying Linear Models in Multi-Resolution
                 Population Data Using Minimum Description Length
                 Principle to Predict Household Income",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "15:1--15:30",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424670",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424670",
  abstract =     "One shirt size cannot fit everybody, while we cannot
                 make a unique shirt that fits perfectly for everyone
                 because of resource limitations. This analogy is true
                 for policy making as well. Policy makers cannot make a
                 single policy to solve all problems \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Feng:2021:RSP,
  author =       "Yi Feng and Chuanyi Li and Jidong Ge and Bin Luo and
                 Vincent Ng",
  title =        "Recommending Statutes: a Portable Method Based on
                 Neural Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "16:1--16:22",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424671",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424671",
  abstract =     "Legal judgment prediction, which aims at predicting
                 judgment results such as penalty, charges, and statutes
                 for cases, has attracted much attention recently. In
                 this article, we focus on building a recommender system
                 to predict the associated statutes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:HNH,
  author =       "Yashen Wang and Huanhuan Zhang",
  title =        "{HARP}: a Novel Hierarchical Attention Model for
                 Relation Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "17:1--17:22",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3424673",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3424673",
  abstract =     "Recent years have witnessed great advancement of
                 representation learning (RL)-based models for the
                 knowledge graph relation prediction task. However, they
                 generally rely on structure information embedded in the
                 encyclopedic knowledge graph, while the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2021:HOS,
  author =       "Dawei Zhou and Si Zhang and Mehmet Yigit Yildirim and
                 Scott Alcorn and Hanghang Tong and Hasan Davulcu and
                 Jingrui He",
  title =        "High-Order Structure Exploration on Massive Graphs: a
                 Local Graph Clustering Perspective",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "18:1--18:26",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3425637",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3425637",
  abstract =     "Modeling and exploring high-order connectivity
                 patterns, also called network motifs, are essential for
                 understanding the fundamental structures that control
                 and mediate the behavior of many complex systems. For
                 example, in social networks, triangles \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2021:EFM,
  author =       "Chongshou Li and Brenda Cheang and Zhixing Luo and
                 Andrew Lim",
  title =        "An Exponential Factorization Machine with Percentage
                 Error Minimization to Retail Sales Forecasting",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "19:1--19:32",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3426238",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3426238",
  abstract =     "This article proposes a new approach to sales
                 forecasting for new products (stock-keeping units
                 [SKUs]) with long lead time but short product life
                 cycle. These SKUs are usually sold for one season only,
                 without any replenishments. An exponential \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Djenouri:2021:TOD,
  author =       "Youcef Djenouri and Djamel Djenouri and Jerry Chun-Wei
                 Lin",
  title =        "Trajectory Outlier Detection: New Problems and
                 Solutions for Smart Cities",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "20:1--20:28",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3425867",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3425867",
  abstract =     "This article introduces two new problems related to
                 trajectory outlier detection: (1) group trajectory
                 outlier (GTO) detection and (2) deviation point
                 detection for both individual and group of trajectory
                 outliers. Five algorithms are proposed for the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:SSR,
  author =       "Kafeng Wang and Haoyi Xiong and Jiang Bian and
                 Zhanxing Zhu and Qian Gao and Zhishan Guo and
                 Cheng-Zhong Xu and Jun Huan and Dejing Dou",
  title =        "Sampling Sparse Representations with Randomized
                 Measurement {Langevin} Dynamics",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "21:1--21:21",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3427585",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3427585",
  abstract =     "Stochastic Gradient Langevin Dynamics (SGLD) have been
                 widely used for Bayesian sampling from certain
                 probability distributions, incorporating derivatives of
                 the log-posterior. With the derivative evaluation of
                 the log-posterior distribution, SGLD \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Belohlavek:2021:ATP,
  author =       "Radim Belohlavek and Martin Trnecka",
  title =        "The {8M} Algorithm from Today's Perspective",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "22:1--22:22",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3428078",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3428078",
  abstract =     "We provide a detailed analysis and a first complete
                 description of 8M-an old but virtually unknown
                 algorithm for Boolean matrix factorization. Even though
                 the algorithm uses a rather limited insight into the
                 factorization problem from today's \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2021:CIN,
  author =       "En Xu and Zhiwen Yu and Bin Guo and Helei Cui",
  title =        "Core Interest Network for Click-Through Rate
                 Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "23:1--23:16",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3428079",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3428079",
  abstract =     "In modern online advertising systems, the
                 click-through rate (CTR) is an important index to
                 measure the popularity of an item. It refers to the
                 ratio of users who click on a specific advertisement to
                 the number of total users who view it. Predicting the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ghosh:2021:CBE,
  author =       "Aindrila Ghosh and Mona Nashaat and James Miller and
                 Shaikh Quader",
  title =        "Context-Based Evaluation of Dimensionality Reduction
                 Algorithms-Experiments and Statistical Significance
                 Analysis",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "24:1--24:40",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3428077",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3428077",
  abstract =     "Dimensionality reduction is a commonly used technique
                 in data analytics. Reducing the dimensionality of
                 datasets helps not only with managing their analytical
                 complexity but also with removing redundancy. Over the
                 years, several such algorithms have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:HNA,
  author =       "Shikang Liu and Fatemeh Vahedian and David Hachen and
                 Omar Lizardo and Christian Poellabauer and Aaron
                 Striegel and Tijana Milenkovi{\'c}",
  title =        "Heterogeneous Network Approach to Predict Individuals'
                 Mental Health",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "25:1--25:26",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3429446",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3429446",
  abstract =     "Depression and anxiety are critical public health
                 issues affecting millions of people around the world.
                 To identify individuals who are vulnerable to
                 depression and anxiety, predictive models have been
                 built that typically utilize data from one source.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2021:UMF,
  author =       "Zhengze Zhou and Giles Hooker",
  title =        "Unbiased Measurement of Feature Importance in
                 Tree-Based Methods",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "26:1--26:21",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3429445",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3429445",
  abstract =     "We propose a modification that corrects for
                 split-improvement variable importance measures in
                 Random Forests and other tree-based methods. These
                 methods have been shown to be biased towards increasing
                 the importance of features with more potential
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Almeida:2021:MCB,
  author =       "Matthew Almeida and Yong Zhuang and Wei Ding and Scott
                 E. Crouter and Ping Chen",
  title =        "Mitigating Class-Boundary Label Uncertainty to Reduce
                 Both Model Bias and Variance",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "27:1--27:18",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3429447",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3429447",
  abstract =     "The study of model bias and variance with respect to
                 decision boundaries is critically important in
                 supervised learning and artificial intelligence. There
                 is generally a tradeoff between the two, as fine-tuning
                 of the decision boundary of a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Munoz:2021:ISA,
  author =       "Mario Andr{\'e}s Mu{\~n}oz and Tao Yan and Matheus R.
                 Leal and Kate Smith-Miles and Ana Carolina Lorena and
                 Gisele L. Pappa and R{\^o}mulo Madureira Rodrigues",
  title =        "An Instance Space Analysis of Regression Problems",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "28:1--28:25",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436893",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436893",
  abstract =     "The quest for greater insights into algorithm
                 strengths and weaknesses, as revealed when studying
                 algorithm performance on large collections of test
                 problems, is supported by interactive visual analytics
                 tools. A recent advance is Instance Space \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Coscia:2021:NCS,
  author =       "Michele Coscia",
  title =        "Noise Corrected Sampling of Online Social Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "29:1--29:21",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434749",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434749",
  abstract =     "In this article, we propose a new method to perform
                 topological network sampling. Topological network
                 sampling is a process for extracting a subset of nodes
                 and edges from a network, such that analyses on the
                 sample provide results and conclusions \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Steinbuss:2021:GAO,
  author =       "Georg Steinbuss and Klemens B{\"o}hm",
  title =        "Generating Artificial Outliers in the Absence of
                 Genuine Ones --- A Survey",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "30:1--30:37",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447822",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447822",
  abstract =     "By definition, outliers are rarely observed in
                 reality, making them difficult to detect or analyze.
                 Artificial outliers approximate such genuine outliers
                 and can, for instance, help with the detection of
                 genuine outliers or with benchmarking outlier-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2021:SCS,
  author =       "Yi Zhu and Lei Li and Xindong Wu",
  title =        "Stacked Convolutional Sparse Auto-Encoders for
                 Representation Learning",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "31:1--31:21",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434767",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434767",
  abstract =     "Deep learning seeks to achieve excellent performance
                 for representation learning in image datasets. However,
                 supervised deep learning models such as convolutional
                 neural networks require a large number of labeled image
                 data, which is intractable in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2021:JTD,
  author =       "Bin Sun and Dehui Kong and Shaofan Wang and Lichun
                 Wang and Baocai Yin",
  title =        "Joint Transferable Dictionary Learning and View
                 Adaptation for Multi-view Human Action Recognition",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "32:1--32:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434746",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434746",
  abstract =     "Multi-view human action recognition remains a
                 challenging problem due to large view changes. In this
                 article, we propose a transfer learning-based framework
                 called transferable dictionary learning and view
                 adaptation (TDVA) model for multi-view human \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Garciarena:2021:TAC,
  author =       "Unai Garciarena and Alexander Mendiburu and Roberto
                 Santana",
  title =        "Towards Automatic Construction of Multi-Network Models
                 for Heterogeneous Multi-Task Learning",
  journal =      j-TKDD,
  volume =       "15",
  number =       "2",
  pages =        "33:1--33:23",
  month =        apr,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434748",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sun Apr 11 08:38:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434748",
  abstract =     "Multi-task learning, as it is understood nowadays,
                 consists of using one single model to carry out several
                 similar tasks. From classifying hand-written characters
                 of different alphabets to figuring out how to play
                 several Atari games using reinforcement \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ying:2021:IKB,
  author =       "Shi Ying and Bingming Wang and Lu Wang and Qingshan Li
                 and Yishi Zhao and Jianga Shang and Hao Huang and Guoli
                 Cheng and Zhe Yang and Jiangyi Geng",
  title =        "An Improved {KNN}-Based Efficient Log Anomaly
                 Detection Method with Automatically Labeled Samples",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "34:1--34:22",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441448",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441448",
  abstract =     "Logs that record system abnormal states (anomaly logs)
                 can be regarded as outliers, and the k-Nearest Neighbor
                 (kNN) algorithm has relatively high accuracy in outlier
                 detection methods. Therefore, we use the kNN algorithm
                 to detect anomalies in the log \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Salve:2021:PIU,
  author =       "Andrea {De Salve} and Paolo Mori and Barbara Guidi and
                 Laura Ricci and Roberto {Di Pietro}",
  title =        "Predicting Influential Users in Online Social Network
                 Groups",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "35:1--35:50",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441447",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441447",
  abstract =     "The widespread adoption of Online Social Networks
                 (OSNs), the ever-increasing amount of information
                 produced by their users, and the corresponding capacity
                 to influence markets, politics, and society, have led
                 both industrial and academic researchers to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xie:2021:UPC,
  author =       "Hong Xie and Mingze Zhong and Yongkun Li and John C.
                 S. Lui",
  title =        "Understanding Persuasion Cascades in Online Product
                 Rating Systems: Modeling, Analysis, and Inference",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "36:1--36:29",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3440887",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3440887",
  abstract =     "Online product rating systems have become an
                 indispensable component for numerous web services such
                 as Amazon, eBay, Google Play Store, and TripAdvisor.
                 One functionality of such systems is to uncover the
                 product quality via product ratings (or reviews)
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2021:POP,
  author =       "Zheng Zhang and Xiaofeng Zhu and Guangming Lu and
                 Yudong Zhang",
  title =        "Probability Ordinal-Preserving Semantic Hashing for
                 Large-Scale Image Retrieval",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "37:1--37:22",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442204",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442204",
  abstract =     "Semantic hashing enables computation and
                 memory-efficient image retrieval through learning
                 similarity-preserving binary representations. Most
                 existing hashing methods mainly focus on preserving the
                 piecewise class information or pairwise correlations of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shin:2021:CFA,
  author =       "Kijung Shin and Euiwoong Lee and Jinoh Oh and Mohammad
                 Hammoud and Christos Faloutsos",
  title =        "{CoCoS}: Fast and Accurate Distributed Triangle
                 Counting in Graph Streams",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "38:1--38:30",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441487",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441487",
  abstract =     "Given a graph stream, how can we estimate the number
                 of triangles in it using multiple machines with limited
                 storage? Specifically, how should edges be processed
                 and sampled across the machines for rapid and accurate
                 estimation? The count of triangles \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ata:2021:MVC,
  author =       "Sezin Kircali Ata and Yuan Fang and Min Wu and Jiaqi
                 Shi and Chee Keong Kwoh and Xiaoli Li",
  title =        "Multi-View Collaborative Network Embedding",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "39:1--39:18",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441450",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441450",
  abstract =     "Real-world networks often exist with multiple views,
                 where each view describes one type of interaction among
                 a common set of nodes. For example, on a video-sharing
                 network, while two user nodes are linked, if they have
                 common favorite videos in one view, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:SVR,
  author =       "Wei Wang and Feng Xia and Jian Wu and Zhiguo Gong and
                 Hanghang Tong and Brian D. Davison",
  title =        "{Scholar2vec}: Vector Representation of Scholars for
                 Lifetime Collaborator Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "40:1--40:19",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442199",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442199",
  abstract =     "While scientific collaboration is critical for a
                 scholar, some collaborators can be more significant
                 than others, e.g., lifetime collaborators. It has been
                 shown that lifetime collaborators are more influential
                 on a scholar's academic performance. However,.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bai:2021:TTG,
  author =       "Luyi Bai and Xiangnan Ma and Mingcheng Zhang and
                 Wenting Yu",
  title =        "{TPmod}: a Tendency-Guided Prediction Model for
                 Temporal Knowledge Graph Completion",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "41:1--41:17",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3443687",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3443687",
  abstract =     "Temporal knowledge graphs (TKGs) have become useful
                 resources for numerous Artificial Intelligence
                 applications, but they are far from completeness.
                 Inferring missing events in temporal knowledge graphs
                 is a fundamental and challenging task. However, most
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:RCE,
  author =       "Jingjing Wang and Wenjun Jiang and Kenli Li and Keqin
                 Li",
  title =        "Reducing Cumulative Errors of Incremental {CP}
                 Decomposition in Dynamic Online Social Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "42:1--42:33",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441645",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441645",
  abstract =     "CANDECOMP/PARAFAC (CP) decomposition is widely used in
                 various online social network (OSN) applications.
                 However, it is inefficient when dealing with massive
                 and incremental data. Some incremental CP decomposition
                 (ICP) methods have been proposed to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2021:PGA,
  author =       "Guanhao Wu and Xiaofeng Gao and Ge Yan and Guihai
                 Chen",
  title =        "Parallel Greedy Algorithm to Multiple Influence
                 Maximization in Social Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "43:1--43:21",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442341",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442341",
  abstract =     "Influence Maximization (IM) problem is to select
                 influential users to maximize the influence spread,
                 which plays an important role in many real-world
                 applications such as product recommendation, epidemic
                 control, and network monitoring. Nowadays multiple
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2021:EDR,
  author =       "Lei Yang and Xi Yu and Jiannong Cao and Xuxun Liu and
                 Pan Zhou",
  title =        "Exploring Deep Reinforcement Learning for Task
                 Dispatching in Autonomous On-Demand Services",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "44:1--44:23",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442343",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442343",
  abstract =     "Autonomous on-demand services, such as GOGOX (formerly
                 GoGoVan) in Hong Kong, provide a platform for users to
                 request services and for suppliers to meet such
                 demands. In such a platform, the suppliers have
                 autonomy to accept or reject the demands to be
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2021:GLG,
  author =       "Lin Cheng and Yuliang Shi and Kun Zhang and Xinjun
                 Wang and Zhiyong Chen",
  title =        "{GGATB-LSTM}: Grouping and Global Attention-based
                 Time-aware Bidirectional {LSTM} Medical Treatment
                 Behavior Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "45:1--45:16",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441454",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441454",
  abstract =     "In China, with the continuous development of national
                 health insurance policies, more and more people have
                 joined the health insurance. How to accurately predict
                 patients future medical treatment behavior becomes a
                 hotspot issue. The biggest challenge in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:SRS,
  author =       "Xueyan Liu and Bo Yang and Hechang Chen and Katarzyna
                 Musial and Hongxu Chen and Yang Li and Wanli Zuo",
  title =        "A Scalable Redefined Stochastic Blockmodel",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "46:1--46:28",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442589",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442589",
  abstract =     "Stochastic blockmodel (SBM) is a widely used
                 statistical network representation model, with good
                 interpretability, expressiveness, generalization, and
                 flexibility, which has become prevalent and important
                 in the field of network science over the last
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:KTW,
  author =       "Yan Liu and Bin Guo and Daqing Zhang and Djamal
                 Zeghlache and Jingmin Chen and Ke Hu and Sizhe Zhang
                 and Dan Zhou and Zhiwen Yu",
  title =        "Knowledge Transfer with Weighted Adversarial Network
                 for Cold-Start Store Site Recommendation",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "47:1--47:27",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442203",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442203",
  abstract =     "Store site recommendation aims to predict the value of
                 the store at candidate locations and then recommend the
                 optimal location to the company for placing a new
                 brick-and-mortar store. Most existing studies focus on
                 learning machine learning or deep \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nasir:2021:TAM,
  author =       "Muhammad Anis Uddin Nasir and Cigdem Aslay and
                 Gianmarco {De Francisci Morales} and Matteo Riondato",
  title =        "{TipTap}: Approximate Mining of Frequent $k$-Subgraph
                 Patterns in Evolving Graphs",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "48:1--48:35",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442590",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442590",
  abstract =     "``Perhaps he could dance first and think afterwards,
                 if it isn't too much to ask him.'' S. Beckett, Waiting
                 for Godot Given a labeled graph, the collection of
                 -vertex induced connected subgraph patterns that appear
                 in the graph more frequently than a user-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2021:PIR,
  author =       "Chen Lin and Zhichao Ouyang and Xiaoli Wang and Hui Li
                 and Zhenhua Huang",
  title =        "Preserve Integrity in Realtime Event Summarization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "49:1--49:29",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442344",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442344",
  abstract =     "Online text streams such as Twitter are the major
                 information source for users when they are looking for
                 ongoing events. Realtime event summarization aims to
                 generate and update coherent and concise summaries to
                 describe the state of a given event. Due to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2021:DLB,
  author =       "Jie Jiang and Qiuqiang Kong and Mark D. Plumbley and
                 Nigel Gilbert and Mark Hoogendoorn and Diederik M.
                 Roijers",
  title =        "Deep Learning-Based Energy Disaggregation and On\slash
                 Off Detection of Household Appliances",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "50:1--50:21",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441300",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441300",
  abstract =     "Energy disaggregation, a.k.a. Non-Intrusive Load
                 Monitoring, aims to separate the energy consumption of
                 individual appliances from the readings of a mains
                 power meter measuring the total energy consumption of,
                 e.g., a whole house. Energy consumption of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2021:EHQ,
  author =       "Haida Zhang and Zengfeng Huang and Xuemin Lin and Zhe
                 Lin and Wenjie Zhang and Ying Zhang",
  title =        "Efficient and High-Quality Seeded Graph Matching:
                 Employing Higher-order Structural Information",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "51:1--51:31",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442340",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442340",
  abstract =     "Driven by many real applications, we study the problem
                 of seeded graph matching. Given two graphs and, and a
                 small set of pre-matched node pairs where and, the
                 problem is to identify a matching between and growing
                 from, such that each pair in the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Barlaug:2021:NNE,
  author =       "Nils Barlaug and Jon Atle Gulla",
  title =        "Neural Networks for Entity Matching: a Survey",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "52:1--52:37",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442200",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442200",
  abstract =     "Entity matching is the problem of identifying which
                 records refer to the same real-world entity. It has
                 been actively researched for decades, and a variety of
                 different approaches have been developed. Even today,
                 it remains a challenging problem, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2021:FCM,
  author =       "Chen Chen and Ruiyue Peng and Lei Ying and Hanghang
                 Tong",
  title =        "Fast Connectivity Minimization on Large-Scale
                 Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "3",
  pages =        "53:1--53:25",
  month =        may,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442342",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 5 08:45:16 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442342",
  abstract =     "The connectivity of networks has been widely studied
                 in many high-impact applications, ranging from
                 immunization, critical infrastructure analysis, social
                 network mining, to bioinformatic system studies.
                 Regardless of the end application domains, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:LBC,
  author =       "Yunzhe Wang and George Baciu and Chenhui Li",
  title =        "A Layout-Based Classification Method for Visualizing
                 Time-Varying Graphs",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "54:1--54:24",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441301",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441301",
  abstract =     "Connectivity analysis between the components of large
                 evolving systems can reveal significant patterns of
                 interaction. The systems can be simulated by
                 topological graph structures. However, such analysis
                 becomes challenging on large and complex graphs.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ouyang:2021:MAC,
  author =       "Yi Ouyang and Bin Guo and Xing Tang and Xiuqiang He
                 and Jian Xiong and Zhiwen Yu",
  title =        "Mobile App Cross-Domain Recommendation with
                 Multi-Graph Neural Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "55:1--55:21",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442201",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442201",
  abstract =     "With the rapid development of mobile app ecosystem,
                 mobile apps have grown greatly popular. The explosive
                 growth of apps makes it difficult for users to find
                 apps that meet their interests. Therefore, it is
                 necessary to recommend user with a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dornaika:2021:EET,
  author =       "F. Dornaika",
  title =        "Elastic Embedding through Graph Convolution-based
                 Regression for Semi-supervised Classification",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "56:1--56:11",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441456",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441456",
  abstract =     "This article introduces a scheme for semi-supervised
                 learning by estimating a flexible non-linear data
                 representation that exploits Spectral Graph
                 Convolutions structure. Structured data are exploited
                 in order to determine non-linear and linear models.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2021:LTE,
  author =       "Yanni Li and Bing Liu and Yongbo Yu and Hui Li and
                 Jiacan Sun and Jiangtao Cui",
  title =        "{3E-LDA}: Three Enhancements to Linear Discriminant
                 Analysis",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "57:1--57:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442347",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442347",
  abstract =     "Linear discriminant analysis (LDA) is one of the
                 important techniques for dimensionality reduction,
                 machine learning, and pattern recognition. However, in
                 many applications, applying the classical LDA often
                 faces the following problems: (1) sensitivity
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zang:2021:JMS,
  author =       "Tianzi Zang and Yanmin Zhu and Yanan Xu and Jiadi Yu",
  title =        "Jointly Modeling Spatio-Temporal Dependencies and
                 Daily Flow Correlations for Crowd Flow Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "58:1--58:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3439346",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3439346",
  abstract =     "Crowd flow prediction is a vital problem for an
                 intelligent transportation system construction in a
                 smart city. It plays a crucial role in traffic
                 management and behavioral analysis, thus it has raised
                 great attention from many researchers. However,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ahmed:2021:OST,
  author =       "Nesreen K. Ahmed and Nick Duffield and Ryan A. Rossi",
  title =        "Online Sampling of Temporal Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "59:1--59:27",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3442202",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3442202",
  abstract =     "Temporal networks representing a stream of timestamped
                 edges are seemingly ubiquitous in the real world.
                 However, the massive size and continuous nature of
                 these networks make them fundamentally challenging to
                 analyze and leverage for descriptive and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2021:SIF,
  author =       "Huan Zhao and Quanming Yao and Yangqiu Song and James
                 T. Kwok and Dik Lun Lee",
  title =        "Side Information Fusion for Recommender Systems over
                 Heterogeneous Information Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "60:1--60:32",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441446",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441446",
  abstract =     "Collaborative filtering (CF) has been one of the most
                 important and popular recommendation methods, which
                 aims at predicting users' preferences (ratings) based
                 on their past behaviors. Recently, various types of
                 side information beyond the explicit \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2021:SEB,
  author =       "Daokun Zhang and Jie Yin and Xingquan Zhu and Chengqi
                 Zhang",
  title =        "Search Efficient Binary Network Embedding",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "61:1--61:27",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436892",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436892",
  abstract =     "Traditional network embedding primarily focuses on
                 learning a continuous vector representation for each
                 node, preserving network structure and/or node content
                 information, such that off-the-shelf machine learning
                 algorithms can be easily applied to the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Song:2021:NEH,
  author =       "Guojie Song and Yun Wang and Lun Du and Yi Li and
                 Junshan Wang",
  title =        "Network Embedding on Hierarchical Community Structure
                 Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "62:1--62:23",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434747",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434747",
  abstract =     "Network embedding is a method of learning a
                 low-dimensional vector representation of network
                 vertices under the condition of preserving different
                 types of network properties. Previous studies mainly
                 focus on preserving structural information of vertices
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2021:UVC,
  author =       "Kui Yu and Lin Liu and Jiuyong Li",
  title =        "A Unified View of Causal and Non-causal Feature
                 Selection",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "63:1--63:46",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3436891",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3436891",
  abstract =     "In this article, we aim to develop a unified view of
                 causal and non-causal feature selection methods. The
                 unified view will fill in the gap in the research of
                 the relation between the two types of methods. Based on
                 the Bayesian network framework and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yin:2021:RIR,
  author =       "Shuai Yin and Yanfeng Sun and Junbin Gao and Yongli Hu
                 and Boyue Wang and Baocai Yin",
  title =        "Robust Image Representation via Low Rank Locality
                 Preserving Projection",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "64:1--64:22",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434768",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434768",
  abstract =     "Locality preserving projection (LPP) is a
                 dimensionality reduction algorithm preserving the
                 neighhorhood graph structure of data. However, the
                 conventional LPP is sensitive to outliers existing in
                 data. This article proposes a novel low-rank LPP model
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Steinbuss:2021:BUO,
  author =       "Georg Steinbuss and Klemens B{\"o}hm",
  title =        "Benchmarking Unsupervised Outlier Detection with
                 Realistic Synthetic Data",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "65:1--65:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441453",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441453",
  abstract =     "Benchmarking unsupervised outlier detection is
                 difficult. Outliers are rare, and existing benchmark
                 data contains outliers with various and unknown
                 characteristics. Fully synthetic data usually consists
                 of outliers and regular instances with clear \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2021:SEK,
  author =       "Mingkai Lin and Wenzhong Li and Lynda J. Song and
                 Cam-Tu Nguyen and Xiaoliang Wang and Sanglu Lu",
  title =        "{SAKE}: Estimating {Katz} Centrality Based on Sampling
                 for Large-Scale Social Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "66:1--66:21",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441646",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441646",
  abstract =     "Katz centrality is a fundamental concept to measure
                 the influence of a vertex in a social network. However,
                 existing approaches to calculating Katz centrality in a
                 large-scale network are unpractical and computationally
                 expensive. In this article, we \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amornbunchornvej:2021:VLG,
  author =       "Chainarong Amornbunchornvej and Elena Zheleva and
                 Tanya Berger-Wolf",
  title =        "Variable-lag {Granger} Causality and Transfer Entropy
                 for Time Series Analysis",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "67:1--67:30",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441452",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441452",
  abstract =     "Granger causality is a fundamental technique for
                 causal inference in time series data, commonly used in
                 the social and biological sciences. Typical
                 operationalizations of Granger causality make a strong
                 assumption that every time point of the effect
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xia:2021:ETD,
  author =       "Peike Xia and Wenjun Jiang and Jie Wu and Surong Xiao
                 and Guojun Wang",
  title =        "Exploiting Temporal Dynamics in Product Reviews for
                 Dynamic Sentiment Prediction at the Aspect Level",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "68:1--68:29",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441451",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441451",
  abstract =     "Online reviews and ratings play an important role in
                 shaping the purchase decisions of customers in
                 e-commerce. Many researches have been done to make
                 proper recommendations for users, by exploiting
                 reviews, ratings, user profiles, or behaviors.
                 However,. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kumar:2021:AGN,
  author =       "Suhansanu Kumar and Hari Sundaram",
  title =        "Attribute-Guided Network Sampling Mechanisms",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "69:1--69:24",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441445",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441445",
  abstract =     "This article introduces a novel task-independent
                 sampler for attributed networks. The problem is
                 important because while data mining tasks on network
                 content are common, sampling on internet-scale networks
                 is costly. Link-trace samplers such as Snowball
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ghasemi:2021:UEE,
  author =       "Negin Ghasemi and Ramin Fatourechi and Saeedeh
                 Momtazi",
  title =        "User Embedding for Expert Finding in Community
                 Question Answering",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "70:1--70:16",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441302",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441302",
  abstract =     "The number of users who have the appropriate knowledge
                 to answer asked questions in community question
                 answering is lower than those who ask questions.
                 Therefore, finding expert users who can answer the
                 questions is very crucial and useful. In this
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yan:2021:SAB,
  author =       "Ruidong Yan and Yi Li and Deying Li and Yongcai Wang
                 and Yuqing Zhu and Weili Wu",
  title =        "A Stochastic Algorithm Based on Reverse Sampling
                 Technique to Fight Against the Cyberbullying",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "71:1--71:22",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441455",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441455",
  abstract =     "Cyberbullying has caused serious consequences
                 especially for social network users in recent years.
                 However, the challenge is how to fight against the
                 cyberbullying effectively from the algorithmic
                 perspective. In this article, we study the fighting
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2021:ANE,
  author =       "Juan-Hui Li and Ling Huang and Chang-Dong Wang and
                 Dong Huang and Jian-Huang Lai and Pei Chen",
  title =        "Attributed Network Embedding with Micro-Meso
                 Structure",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "72:1--72:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441486",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441486",
  abstract =     "Recently, network embedding has received a large
                 amount of attention in network analysis. Although some
                 network embedding methods have been developed from
                 different perspectives, on one hand, most of the
                 existing methods only focus on leveraging the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2021:CHI,
  author =       "Benhui Zhang and Maoguo Gong and Jianbin Huang and
                 Xiaoke Ma",
  title =        "Clustering Heterogeneous Information Network by Joint
                 Graph Embedding and Nonnegative Matrix Factorization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "4",
  pages =        "73:1--73:25",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441449",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Jun 19 06:16:23 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441449",
  abstract =     "Many complex systems derived from nature and society
                 consist of multiple types of entities and heterogeneous
                 interactions, which can be effectively modeled as
                 heterogeneous information network (HIN). Structural
                 analysis of heterogeneous networks is of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "73",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yao:2021:SCI,
  author =       "Liuyi Yao and Zhixuan Chu and Sheng Li and Yaliang Li
                 and Jing Gao and Aidong Zhang",
  title =        "A Survey on Causal Inference",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "74:1--74:46",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3444944",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3444944",
  abstract =     "Causal inference is a critical research topic across
                 many domains, such as statistics, computer science,
                 education, public policy, and economics, for decades.
                 Nowadays, estimating causal effect from observational
                 data has become an appealing research \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "74",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jurdi:2021:CNN,
  author =       "Wissam {Al Jurdi} and Jacques {Bou Abdo} and Jacques
                 Demerjian and Abdallah Makhoul",
  title =        "Critique on Natural Noise in Recommender Systems",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "75:1--75:30",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447780",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447780",
  abstract =     "Recommender systems have been upgraded, tested, and
                 applied in many, often incomparable ways. In attempts
                 to diligently understand user behavior in certain
                 environments, those systems have been frequently
                 utilized in domains like e-commerce, e-learning,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "75",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Duong:2021:DGF,
  author =       "Quang-huy Duong and Heri Ramampiaro and Kjetil
                 N{\o}rv{\aa}g and Thu-lan Dam",
  title =        "Density Guarantee on Finding Multiple Subgraphs and
                 Subtensors",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "76:1--76:32",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446668",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446668",
  abstract =     "Dense subregion (subgraph \& subtensor) detection is a
                 well-studied area, with a wide range of applications,
                 and numerous efficient approaches and algorithms have
                 been proposed. Approximation approaches are commonly
                 used for detecting dense subregions \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "76",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Burkhardt:2021:OAB,
  author =       "Paul Burkhardt",
  title =        "Optimal Algebraic Breadth-First Search for Sparse
                 Graphs",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "77:1--77:19",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446216",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446216",
  abstract =     "There has been a rise in the popularity of algebraic
                 methods for graph algorithms given the development of
                 the GraphBLAS library and other sparse matrix methods.
                 An exemplar for these approaches is Breadth-First
                 Search (BFS). The algebraic BFS algorithm \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "77",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Maurya:2021:GNN,
  author =       "Sunil Kumar Maurya and Xin Liu and Tsuyoshi Murata",
  title =        "Graph Neural Networks for Fast Node Ranking
                 Approximation",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "78:1--78:32",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446217",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446217",
  abstract =     "Graphs arise naturally in numerous situations,
                 including social graphs, transportation graphs, web
                 graphs, protein graphs, etc. One of the important
                 problems in these settings is to identify which nodes
                 are important in the graph and how they affect the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "78",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Stefani:2021:TSE,
  author =       "Lorenzo {De Stefani} and Erisa Terolli and Eli Upfal",
  title =        "Tiered Sampling: an Efficient Method for Counting
                 Sparse Motifs in Massive Graph Streams",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "79:1--79:52",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441299",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441299",
  abstract =     "We introduce Tiered Sampling, a novel technique for
                 estimating the count of sparse motifs in massive graphs
                 whose edges are observed in a stream. Our technique
                 requires only a single pass on the data and uses a
                 memory of fixed size M, which can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "79",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bauer:2021:ICL,
  author =       "Josef Bauer and Dietmar Jannach",
  title =        "Improved Customer Lifetime Value Prediction With
                 Sequence-To-Sequence Learning and Feature-Based
                 Models",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "80:1--80:37",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3441444",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3441444",
  abstract =     "The prediction of the Customer Lifetime Value (CLV) is
                 an important asset for tool-supported marketing by
                 customer relationship managers. Since standard methods
                 based on purchase recency, frequency, and past profit
                 and revenue statistics often have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "80",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sanei-Mehri:2021:MLM,
  author =       "Seyed-Vahid Sanei-Mehri and Apurba Das and Hooman
                 Hashemi and Srikanta Tirthapura",
  title =        "Mining Largest Maximal Quasi-Cliques",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "81:1--81:21",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446637",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446637",
  abstract =     "Quasi-cliques are dense incomplete subgraphs of a
                 graph that generalize the notion of cliques.
                 Enumerating quasi-cliques from a graph is a robust way
                 to detect densely connected structures with
                 applications in bioinformatics and social network
                 analysis. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "81",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gan:2021:UMA,
  author =       "Wensheng Gan and Jerry Chun-Wei Lin and Jiexiong Zhang
                 and Hongzhi Yin and Philippe Fournier-Viger and
                 Han-Chieh Chao and Philip S. Yu",
  title =        "Utility Mining Across Multi-Dimensional Sequences",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "82:1--82:24",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446938",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446938",
  abstract =     "Knowledge extraction from database is the fundamental
                 task in database and data mining community, which has
                 been applied to a wide range of real-world applications
                 and situations. Different from the support-based mining
                 models, the utility-oriented \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "82",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hao:2021:DEI,
  author =       "Shaoyang Hao and Bin Guo and Hao Wang and Yunji Liang
                 and Lina Yao and Qianru Wang and Zhiwen Yu",
  title =        "{DeepDepict}: Enabling Information Rich, Personalized
                 Product Description Generation With the Deep Multiple
                 Pointer Generator Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "83:1--83:16",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3446982",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3446982",
  abstract =     "In e-commerce platforms, the online descriptive
                 information of products shows significant impacts on
                 the purchase behaviors. To attract potential buyers for
                 product promotion, numerous workers are employed to
                 write the impressive product descriptions. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "83",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2021:AIM,
  author =       "Jianxiong Guo and Weili Wu",
  title =        "Adaptive Influence Maximization: If Influential Node
                 Unwilling to Be the Seed",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "84:1--84:23",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447396",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447396",
  abstract =     "Influence maximization problem attempts to find a
                 small subset of nodes that makes the expected influence
                 spread maximized, which has been researched intensively
                 before. They all assumed that each user in the seed set
                 we select is activated successfully \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "84",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2021:DEB,
  author =       "Weiyu Cheng and Yanyan Shen and Linpeng Huang and
                 Yanmin Zhu",
  title =        "Dual-Embedding based Deep Latent Factor Models for
                 Recommendation",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "85:1--85:24",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447395",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447395",
  abstract =     "Among various recommendation methods, latent factor
                 models are usually considered to be state-of-the-art
                 techniques, which aim to learn user and item embeddings
                 for predicting user-item preferences. When applying
                 latent factor models to the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "85",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sharma:2021:STL,
  author =       "Shalini Sharma and Angshul Majumdar",
  title =        "Sequential Transform Learning",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "86:1--86:18",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447394",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447394",
  abstract =     "This work proposes a new approach for dynamical
                 modeling; we call it sequential transform learning.
                 This is loosely based on the transform (analysis
                 dictionary) learning formulation. This is the first
                 work on this topic. Transform learning, was \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "86",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:SAS,
  author =       "Kai Liu and Hongbo Liu and Tomas E. Ward and Hua Wang
                 and Yu Yang and Bo Zhang and Xindong Wu",
  title =        "Self-Adaptive Skeleton Approaches to Detect
                 Self-Organized Coalitions From Brain Functional
                 Networks Through Probabilistic Mixture Models",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "87:1--87:26",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447570",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447570",
  abstract =     "Detecting self-organized coalitions from functional
                 networks is one of the most important ways to uncover
                 functional mechanisms in the brain. Determining these
                 raises well-known technical challenges in terms of
                 scale imbalance, outliers and hard-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "87",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ling:2021:DGM,
  author =       "Xiang Ling and Lingfei Wu and Saizhuo Wang and Gaoning
                 Pan and Tengfei Ma and Fangli Xu and Alex X. Liu and
                 Chunming Wu and Shouling Ji",
  title =        "Deep Graph Matching and Searching for Semantic Code
                 Retrieval",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "88:1--88:21",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447571",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447571",
  abstract =     "Code retrieval is to find the code snippet from a
                 large corpus of source code repositories that highly
                 matches the query of natural language description.
                 Recent work mainly uses natural language processing
                 techniques to process both query texts (i.e.,
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "88",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Peng:2021:SSE,
  author =       "Hao Peng and Jianxin Li and Yangqiu Song and Renyu
                 Yang and Rajiv Ranjan and Philip S. Yu and Lifang He",
  title =        "Streaming Social Event Detection and Evolution
                 Discovery in Heterogeneous Information Networks",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "89:1--89:33",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447585",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447585",
  abstract =     "Events are happening in real world and real time,
                 which can be planned and organized for occasions, such
                 as social gatherings, festival celebrations,
                 influential meetings, or sports activities. Social
                 media platforms generate a lot of real-time text
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "89",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yue:2021:EBC,
  author =       "Lin Yue and Hao Shen and Sen Wang and Robert Boots and
                 Guodong Long and Weitong Chen and Xiaowei Zhao",
  title =        "Exploring {BCI} Control in Smart Environments:
                 Intention Recognition Via {EEG} Representation
                 Enhancement Learning",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "90:1--90:20",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450449",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450449",
  abstract =     "The brain-computer interface (BCI) control technology
                 that utilizes motor imagery to perform the desired
                 action instead of manual operation will be widely used
                 in smart environments. However, most of the research
                 lacks robust feature representation of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "90",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:ADK,
  author =       "Huawen Liu and Enhui Li and Xinwang Liu and Kaile Su
                 and Shichao Zhang",
  title =        "Anomaly Detection With Kernel Preserving Embedding",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "91:1--91:18",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447684",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447684",
  abstract =     "Similarity representation plays a central role in
                 increasingly popular anomaly detection techniques,
                 which have been successfully applied in various
                 realistic scenes. Until now, many low-rank
                 representation techniques have been introduced to
                 measure the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "91",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:MMG,
  author =       "Bo Liu and Xi He and Mingdong Song and Jiangqiang Li
                 and Guangzhi Qu and Jianlei Lang and Rentao Gu",
  title =        "A Method for Mining {Granger} Causality Relationship
                 on Atmospheric Visibility",
  journal =      j-TKDD,
  volume =       "15",
  number =       "5",
  pages =        "92:1--92:16",
  month =        jun,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447681",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Jun 29 08:31:04 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447681",
  abstract =     "Atmospheric visibility is an indicator of atmospheric
                 transparency and its range directly reflects the
                 quality of the atmospheric environment. With the
                 acceleration of industrialization and urbanization, the
                 natural environment has suffered some \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "92",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Paul:2021:MOC,
  author =       "Dipanjyoti Paul and Rahul Kumar and Sriparna Saha and
                 Jimson Mathew",
  title =        "Multi-objective Cuckoo Search-based Streaming Feature
                 Selection for Multi-label Dataset",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "93:1--93:24",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447586",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447586",
  abstract =     "The feature selection method is the process of
                 selecting only relevant features by removing irrelevant
                 or redundant features amongst the large number of
                 features that are used to represent data. Nowadays,
                 many application domains especially social media
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "93",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Coro:2021:LRS,
  author =       "Federico Cor{\'o} and Gianlorenzo D'angelo and Yllka
                 Velaj",
  title =        "Link Recommendation for Social Influence
                 Maximization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "94:1--94:23",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3449023",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3449023",
  abstract =     "Social link recommendation systems, like
                 ``People-you-may-know'' on Facebook, ``Who-to-follow''
                 on Twitter, and ``Suggested-Accounts'' on Instagram
                 assist the users of a social network in establishing
                 new connections with other users. While these systems
                 are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "94",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2021:TPO,
  author =       "Xiaofeng Gao and Wenyi Xu and Mingding Liao and Guihai
                 Chen",
  title =        "Trust Prediction for Online Social Networks with
                 Integrated Time-Aware Similarity",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "95:1--95:30",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447682",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447682",
  abstract =     "Online social networks gain increasing popularity in
                 recent years. In online social networks, trust
                 prediction is significant for recommendations of high
                 reputation users as well as in many other applications.
                 In the literature, trust prediction problem \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "95",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bressan:2021:FMC,
  author =       "Marco Bressan and Stefano Leucci and Alessandro
                 Panconesi",
  title =        "Faster Motif Counting via Succinct Color Coding and
                 Adaptive Sampling",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "96:1--96:27",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447397",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447397",
  abstract =     "We address the problem of computing the distribution
                 of induced connected subgraphs, aka graphlets or
                 motifs, in large graphs. The current state-of-the-art
                 algorithms estimate the motif counts via uniform
                 sampling by leveraging the color coding technique
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "96",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zeng:2021:FRD,
  author =       "Shaoning Zeng and Bob Zhang and Jianping Gou and Yong
                 Xu and Wei Huang",
  title =        "Fast and Robust Dictionary-based Classification for
                 Image Data",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "97:1--97:22",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3449360",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3449360",
  abstract =     "Dictionary-based classification has been promising in
                 knowledge discovery from image data, due to its good
                 performance and interpretable theoretical system.
                 Dictionary learning effectively supports both small-
                 and large-scale datasets, while its \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "97",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2021:SEN,
  author =       "Chenglin Li and Carrie Lu Tong and Di Niu and Bei
                 Jiang and Xiao Zuo and Lei Cheng and Jian Xiong and
                 Jianming Yang",
  title =        "Similarity Embedding Networks for Robust Human
                 Activity Recognition",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "98:1--98:17",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3448021",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3448021",
  abstract =     "Deep learning models for human activity recognition
                 (HAR) based on sensor data have been heavily studied
                 recently. However, the generalization ability of deep
                 models on complex real-world HAR data is limited by the
                 availability of high-quality labeled \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "98",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Koley:2021:DEE,
  author =       "Paramita Koley and Avirup Saha and Sourangshu
                 Bhattacharya and Niloy Ganguly and Abir De",
  title =        "Demarcating Endogenous and Exogenous Opinion Dynamics:
                 an Experimental Design Approach",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "99:1--99:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3449361",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3449361",
  abstract =     "The networked opinion diffusion in online social
                 networks is often governed by the two genres of
                 opinions- endogenous opinions that are driven by the
                 influence of social contacts among users, and exogenous
                 opinions which are formed by external effects like
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "99",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Joaristi:2021:SGF,
  author =       "Mikel Joaristi and Edoardo Serra",
  title =        "{SIR-GN}: a Fast Structural Iterative Representation
                 Learning Approach For Graph Nodes",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "100:1--100:39",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450315",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450315",
  abstract =     "Graph representation learning methods have attracted
                 an increasing amount of attention in recent years.
                 These methods focus on learning a numerical
                 representation of the nodes in a graph. Learning these
                 representations is a powerful instrument for tasks
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "100",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2021:LGN,
  author =       "Man Wu and Shirui Pan and Lan Du and Xingquan Zhu",
  title =        "Learning Graph Neural Networks with Positive and
                 Unlabeled Nodes",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "101:1--101:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450316",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450316",
  abstract =     "Graph neural networks (GNNs) are important tools for
                 transductive learning tasks, such as node
                 classification in graphs, due to their expressive power
                 in capturing complex interdependency between nodes. To
                 enable GNN learning, existing works typically
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "101",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2021:NNS,
  author =       "Dongsheng Li and Haodong Liu and Chao Chen and
                 Yingying Zhao and Stephen M. Chu and Bo Yang",
  title =        "{NeuSE}: a Neural Snapshot Ensemble Method for
                 Collaborative Filtering",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "102:1--102:20",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450526",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450526",
  abstract =     "In collaborative filtering (CF) algorithms, the
                 optimal models are usually learned by globally
                 minimizing the empirical risks averaged over all the
                 observed data. However, the global models are often
                 obtained via a performance tradeoff among users/items,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "102",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Deng:2021:PUT,
  author =       "Jinliang Deng and Xiusi Chen and Zipei Fan and Renhe
                 Jiang and Xuan Song and Ivor W. Tsang",
  title =        "The Pulse of Urban Transport: Exploring the
                 Co-evolving Pattern for Spatio-temporal Forecasting",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "103:1--103:25",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450528",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450528",
  abstract =     "Transportation demand forecasting is a topic of large
                 practical value. However, the model that fits the
                 demand of one transportation by only considering the
                 historical data of its own could be vulnerable since
                 random fluctuations could easily impact the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "103",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:HCD,
  author =       "Yashen Wang and Huanhuan Zhang and Zhirun Liu and
                 Qiang Zhou",
  title =        "Hierarchical Concept-Driven Language Model",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "104:1--104:22",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451167",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451167",
  abstract =     "For guiding natural language generation, many
                 semantic-driven methods have been proposed. While
                 clearly improving the performance of the end-to-end
                 training task, these existing semantic-driven methods
                 still have clear limitations: for example, (i) they
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "104",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Alarte:2021:PLM,
  author =       "Juli{\'a}n Alarte and Josep Silva",
  title =        "Page-Level Main Content Extraction From Heterogeneous
                 {Webpages}",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "105:1--105:105",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451168",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451168",
  abstract =     "The main content of a webpage is often surrounded by
                 other boilerplate elements related to the template,
                 such as menus, advertisements, copyright notices, and
                 comments. For crawlers and indexers, isolating the main
                 content from the template and other \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "105",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nettasinghe:2021:MLE,
  author =       "Buddhika Nettasinghe and Vikram Krishnamurthy",
  title =        "Maximum Likelihood Estimation of Power-law Degree
                 Distributions via Friendship Paradox-based Sampling",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "106:1--106:28",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451166",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451166",
  abstract =     "This article considers the problem of estimating a
                 power-law degree distribution of an undirected network
                 using sampled data. Although power-law degree
                 distributions are ubiquitous in nature, the widely used
                 parametric methods for estimating them (e.g.,
                 \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "106",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Anaissi:2021:OTB,
  author =       "Ali Anaissi and Basem Suleiman and Seid Miad Zandavi",
  title =        "Online Tensor-Based Learning Model for Structural
                 Damage Detection",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "107:1--107:18",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451217",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451217",
  abstract =     "The online analysis of multi-way data stored in a
                 tensor has become an essential tool for capturing the
                 underlying structures and extracting the sensitive
                 features that can be used to learn a predictive model.
                 However, data distributions often evolve \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "107",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:MIM,
  author =       "Rui Wang and Yongkun Li and Shuai Lin and Hong Xie and
                 Yinlong Xu and John C. S. Lui",
  title =        "On Modeling Influence Maximization in Social Activity
                 Networks under General Settings",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "108:1--108:28",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451218",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451218",
  abstract =     "Finding the set of most influential users in online
                 social networks (OSNs) to trigger the largest influence
                 cascade is meaningful, e.g., companies may leverage the
                 ``word-of-mouth'' effect to trigger a large cascade of
                 purchases by offering free samples/. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "108",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2021:ICD,
  author =       "Zhe Chen and Aixin Sun and Xiaokui Xiao",
  title =        "Incremental Community Detection on Large Complex
                 Attributed Network",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "109:1--109:20",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451216",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451216",
  abstract =     "Community detection on network data is a fundamental
                 task, and has many applications in industry. Network
                 data in industry can be very large, with incomplete and
                 complex attributes, and more importantly, growing. This
                 calls for a community detection \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "109",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xia:2021:GDD,
  author =       "Tong Xia and Junjie Lin and Yong Li and Jie Feng and
                 Pan Hui and Funing Sun and Diansheng Guo and Depeng
                 Jin",
  title =        "{$3$DGCN}: {$3$-Dimensional} Dynamic Graph
                 Convolutional Network for Citywide Crowd Flow
                 Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "110:1--110:21",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451394",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451394",
  abstract =     "Crowd flow prediction is an essential task benefiting
                 a wide range of applications for the transportation
                 system and public safety. However, it is a challenging
                 problem due to the complex spatio-temporal dependence
                 and the complicated impact of urban \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "110",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2021:FBN,
  author =       "Kai Liu and Xiangyu Li and Zhihui Zhu and Lodewijk
                 Brand and Hua Wang",
  title =        "Factor-Bounded Nonnegative Matrix Factorization",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "111:1--111:18",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451395",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451395",
  abstract =     "Nonnegative Matrix Factorization (NMF) is broadly used
                 to determine class membership in a variety of
                 clustering applications. From movie recommendations and
                 image clustering to visual feature extractions, NMF has
                 applications to solve a large number of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "111",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2021:ACA,
  author =       "Huandong Wang and Yong Li and Mu Du and Zhenhui Li and
                 Depeng Jin",
  title =        "{App2Vec}: Context-Aware Application Usage
                 Prediction",
  journal =      j-TKDD,
  volume =       "15",
  number =       "6",
  pages =        "112:1--112:21",
  month =        jul,
  year =         "2021",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451396",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Jul 21 07:02:35 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451396",
  abstract =     "Both app developers and service providers have strong
                 motivations to understand when and where certain apps
                 are used by users. However, it has been a challenging
                 problem due to the highly skewed and noisy app usage
                 data. Moreover, apps are regarded as \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "112",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:DLV,
  author =       "Fenglin Liu and Xian Wu and Shen Ge and Xuancheng Ren
                 and Wei Fan and Xu Sun and Yuexian Zou",
  title =        "{DiMBERT}: Learning Vision-Language Grounded
                 Representations with Disentangled
                 Multimodal-Attention",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "1:1--1:19",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447685",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447685",
  abstract =     "Vision-and-language (V-L) tasks require the system to
                 understand both vision content and natural language,
                 thus learning fine-grained joint representations of
                 vision and language (a.k.a. V-L representations) is of
                 paramount importance. Recently, various \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2022:CDR,
  author =       "Chen Gao and Yong Li and Fuli Feng and Xiangning Chen
                 and Kai Zhao and Xiangnan He and Depeng Jin",
  title =        "Cross-domain Recommendation with Bridge-Item
                 Embeddings",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "2:1--2:23",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3447683",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3447683",
  abstract =     "Web systems that provide the same functionality
                 usually share a certain amount of items. This makes it
                 possible to combine data from different websites to
                 improve recommendation quality, known as the
                 cross-domain recommendation task. Despite many research
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2022:LVA,
  author =       "Luyue Lin and Xin Zheng and Bo Liu and Wei Chen and
                 Yanshan Xiao",
  title =        "A Latent Variable Augmentation Method for Image
                 Categorization with Insufficient Training Samples",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "3:1--3:35",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451165",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451165",
  abstract =     "Over the past few years, we have made great progress
                 in image categorization based on convolutional neural
                 networks (CNNs). These CNNs are always trained based on
                 a large-scale image data set; however, people may only
                 have limited training samples for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2022:GBS,
  author =       "Jianliang Gao and Xiaoting Ying and Cong Xu and
                 Jianxin Wang and Shichao Zhang and Zhao Li",
  title =        "Graph-Based Stock Recommendation by Time-Aware
                 Relational Attention Network",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "4:1--4:21",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451397",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451397",
  abstract =     "The stock market investors aim at maximizing their
                 investment returns. Stock recommendation task is to
                 recommend stocks with higher return ratios for the
                 investors. Most stock prediction methods study the
                 historical sequence patterns to predict stock
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2022:MML,
  author =       "Yaojin Lin and Qinghua Hu and Jinghua Liu and Xingquan
                 Zhu and Xindong Wu",
  title =        "{MULFE}: Multi-Label Learning via Label-Specific
                 Feature Space Ensemble",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "5:1--5:24",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451392",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451392",
  abstract =     "In multi-label learning, label correlations commonly
                 exist in the data. Such correlation not only provides
                 useful information, but also imposes significant
                 challenges for multi-label learning. Recently,
                 label-specific feature embedding has been proposed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2022:JPF,
  author =       "Fandel Lin and Hsun-Ping Hsieh",
  title =        "A Joint Passenger Flow Inference and Path Recommender
                 System for Deploying New Routes and Stations of Mass
                 Transit Transportation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "6:1--6:36",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451393",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451393",
  abstract =     "In this work, a novel decision assistant system for
                 urban transportation, called Route Scheme Assistant
                 (RSA), is proposed to address two crucial issues that
                 few former researches have focused on: route-based
                 passenger flow (PF) inference and multivariant
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:BAM,
  author =       "Huafeng Liu and Liping Jing and Jingxuan Wen and
                 Pengyu Xu and Jian Yu and Michael K. Ng",
  title =        "{Bayesian} Additive Matrix Approximation for Social
                 Recommendation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "7:1--7:34",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451391",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451391",
  abstract =     "Social relations between users have been proven to be
                 a good type of auxiliary information to improve the
                 recommendation performance. However, it is a
                 challenging issue to sufficiently exploit the social
                 relations and correctly determine the user \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2022:RCT,
  author =       "Jinjin Guo and Longbing Cao and Zhiguo Gong",
  title =        "Recurrent Coupled Topic Modeling over Sequential
                 Documents",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "8:1--8:32",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451530",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451530",
  abstract =     "The abundant sequential documents such as online
                 archival, social media, and news feeds are streamingly
                 updated, where each chunk of documents is incorporated
                 with smoothly evolving yet dependent topics. Such
                 digital texts have attracted extensive \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2022:DLD,
  author =       "Yunyan Guo and Jianzhong Li",
  title =        "Distributed Latent {Dirichlet} Allocation on Streams",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "9:1--9:20",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451528",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451528",
  abstract =     "Latent Dirichlet Allocation (LDA) has been widely used
                 for topic modeling, with applications spanning various
                 areas such as natural language processing and
                 information retrieval. While LDA on small and static
                 datasets has been extensively studied, several
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Han:2022:EAI,
  author =       "Juhee Han and Younghoon Lee",
  title =        "Explainable Artificial Intelligence-Based Competitive
                 Factor Identification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "10:1--10:11",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451529",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451529",
  abstract =     "Competitor analysis is an essential component of
                 corporate strategy, providing both offensive and
                 defensive strategic contexts to identify opportunities
                 and threats. The rapid development of social media has
                 recently led to several methodologies and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Laishram:2022:MEA,
  author =       "Ricky Laishram and Jeremy D. Wendt and Sucheta
                 Soundarajan",
  title =        "{MCS+}: an Efficient Algorithm for Crawling the
                 Community Structure in Multiplex Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "11:1--11:32",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451527",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451527",
  abstract =     "In this article, we consider the problem of crawling a
                 multiplex network to identify the community structure
                 of a layer-of-interest. A multiplex network is one
                 where there are multiple types of relationships between
                 the nodes. In many multiplex networks, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:GML,
  author =       "Lichen Wang and Zhengming Ding and Yun Fu",
  title =        "Generic Multi-label Annotation via Adaptive Graph and
                 Marginalized Augmentation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "12:1--12:20",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3451884",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3451884",
  abstract =     "Multi-label learning recovers multiple labels from a
                 single instance. It is a more challenging task compared
                 with single-label manner. Most multi-label learning
                 approaches need large-scale well-labeled samples to
                 achieve high accurate performance. However,. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Moreo:2022:LTT,
  author =       "Alejandro Moreo and Andrea Esuli and Fabrizio
                 Sebastiani",
  title =        "Lost in Transduction: Transductive Transfer Learning
                 in Text Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "13:1--13:21",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453146",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453146",
  abstract =     "Obtaining high-quality labelled data for training a
                 classifier in a new application domain is often costly.
                 Transfer Learning (a.k.a. ``Inductive Transfer'') tries
                 to alleviate these costs by transferring, to the
                 ``target'' domain of interest, knowledge \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:MTP,
  author =       "Yangfan Li and Kenli Li and Cen Chen and Xu Zhou and
                 Zeng Zeng and Keqin Li",
  title =        "Modeling Temporal Patterns with Dilated Convolutions
                 for Time-Series Forecasting",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "14:1--14:22",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3453724",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3453724",
  abstract =     "Time-series forecasting is an important problem across
                 a wide range of domains. Designing accurate and prompt
                 forecasting algorithms is a non-trivial task, as
                 temporal data that arise in real applications often
                 involve both non-linear dynamics and linear \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2022:CCP,
  author =       "Keyu Yang and Yunjun Gao and Lei Liang and Song Bian
                 and Lu Chen and Baihua Zheng",
  title =        "{CrowdTC}: Crowd-powered Learning for Text
                 Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "15:1--15:23",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457216",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457216",
  abstract =     "Text classification is a fundamental task in content
                 analysis. Nowadays, deep learning has demonstrated
                 promising performance in text classification compared
                 with shallow models. However, almost all the existing
                 models do not take advantage of the wisdom \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:JMH,
  author =       "Haobing Liu and Yanmin Zhu and Tianzi Zang and Yanan
                 Xu and Jiadi Yu and Feilong Tang",
  title =        "Jointly Modeling Heterogeneous Student Behaviors and
                 Interactions among Multiple Prediction Tasks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "16:1--16:24",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458023",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458023",
  abstract =     "Prediction tasks about students have practical
                 significance for both student and college. Making
                 multiple predictions about students is an important
                 part of a smart campus. For instance, predicting
                 whether a student will fail to graduate can alert the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lee:2022:MMS,
  author =       "Wu Lee and Yuliang Shi and Hongfeng Sun and Lin Cheng
                 and Kun Zhang and Xinjun Wang and Zhiyong Chen",
  title =        "{MSIPA}: Multi-Scale Interval Pattern-Aware Network
                 for {ICU} Transfer Prediction",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "17:1--17:17",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458284",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458284",
  abstract =     "Accurate prediction of patients' ICU transfer events
                 is of great significance for improving ICU treatment
                 efficiency. ICU transition prediction task based on
                 Electronic Health Records (EHR) is a temporal mining
                 task like many other health informatics \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:BSF,
  author =       "Min-Ling Zhang and Jun-Peng Fang and Yi-Bo Wang",
  title =        "{BiLabel}-Specific Features for Multi-Label
                 Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "18:1--18:23",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458283",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458283",
  abstract =     "In multi-label classification, the task is to induce
                 predictive models which can assign a set of relevant
                 labels for the unseen instance. The strategy of
                 label-specific features has been widely employed in
                 learning from multi-label examples, where the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:NMV,
  author =       "Bo Liu and Haowen Zhong and Yanshan Xiao",
  title =        "New Multi-View Classification Method with Uncertain
                 Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "19:1--19:23",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3458282",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3458282",
  abstract =     "Multi-view classification aims at designing a
                 multi-view learning strategy to train a classifier from
                 multi-view data, which are easily collected in
                 practice. Most of the existing works focus on
                 multi-view classification by assuming the multi-view
                 data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Na:2022:USE,
  author =       "Gyoung S. Na and Hyunju Chang",
  title =        "Unsupervised Subspace Extraction via Deep Kernelized
                 Clustering",
  journal =      j-TKDD,
  volume =       "16",
  number =       "1",
  pages =        "20:1--20:15",
  month =        feb,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3459082",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:39 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3459082",
  abstract =     "Feature extraction has been widely studied to find
                 informative latent features and reduce the
                 dimensionality of data. In particular, due to the
                 difficulty in obtaining labeled data, unsupervised
                 feature extraction has received much attention in data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Aggarwal:2022:CEC,
  author =       "Charu C. Aggarwal",
  title =        "Communication from the {Editor-in-Chief}: State of the
                 {{\booktitle{ACM Transactions on Knowledge Discovery
                 from Data}}}",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "21e:1--21e:2",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3463950",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3463950",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21e",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:SUM,
  author =       "Chunkai Zhang and Zilin Du and Yuting Yang and
                 Wensheng Gan and Philip S. Yu",
  title =        "On-Shelf Utility Mining of Sequence Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "21:1--21:31",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3457570",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3457570",
  abstract =     "Utility mining has emerged as an important and
                 interesting topic owing to its wide application and
                 considerable popularity. However, conventional utility
                 mining methods have a bias toward items that have
                 longer on-shelf time as they have a greater chance
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tran:2022:CDP,
  author =       "Cong Tran and Won-Yong Shin and Andreas Spitz",
  title =        "Community Detection in Partially Observable Social
                 Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "22:1--22:24",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461339",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461339",
  abstract =     "The discovery of community structures in social
                 networks has gained significant attention since it is a
                 fundamental problem in understanding the networks'
                 topology and functions. However, most social network
                 data are collected from partially observable \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:CDL,
  author =       "Zhao Li and Junshuai Song and Zehong Hu and Zhen Wang
                 and Jun Gao",
  title =        "Constrained Dual-Level Bandit for Personalized
                 Impression Regulation in Online Ranking Systems",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "23:1--23:23",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461340",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461340",
  abstract =     "Impression regulation plays an important role in
                 various online ranking systems, e.g., e-commerce
                 ranking systems always need to achieve local commercial
                 demands on some pre-labeled target items like fresh
                 item cultivation and fraudulent item \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ohare:2022:HVT,
  author =       "Kevin O'hare and Anna Jurek-Loughrey and Cassio {De
                 Campos}",
  title =        "High-Value Token-Blocking: Efficient Blocking Method
                 for Record Linkage",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "24:1--24:17",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3450527",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3450527",
  abstract =     "Data integration is an important component of Big Data
                 analytics. One of the key challenges in data
                 integration is record linkage, that is, matching
                 records that represent the same real-world entity.
                 Because of computational costs, methods referred to as
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ding:2022:ESU,
  author =       "Ming Ding and Tianyu Wang and Xudong Wang",
  title =        "Establishing Smartphone User Behavior Model Based on
                 Energy Consumption Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "25:1--25:40",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3461459",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3461459",
  abstract =     "In smartphone data analysis, both energy consumption
                 modeling and user behavior mining have been explored
                 extensively, but the relationship between energy
                 consumption and user behavior has been rarely studied.
                 Such a relationship is explored over large-\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Saude:2022:RRB,
  author =       "Jo{\~a}o Sa{\'u}de and Guilherme Ramos and Ludovico
                 Boratto and Carlos Caleiro",
  title =        "A Robust Reputation-Based Group Ranking System and Its
                 Resistance to Bribery",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "26:1--26:35",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3462210",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3462210",
  abstract =     "The spread of online reviews and opinions and its
                 growing influence on people's behavior and decisions
                 boosted the interest to extract meaningful information
                 from this data deluge. Hence, crowdsourced ratings of
                 products and services gained a critical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2022:EHG,
  author =       "Hanlu Wu and Tengfei Ma and Lingfei Wu and Fangli Xu
                 and Shouling Ji",
  title =        "Exploiting Heterogeneous Graph Neural Networks with
                 Latent Worker\slash Task Correlation Information for
                 Label Aggregation in Crowdsourcing",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "27:1--27:18",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3460865",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3460865",
  abstract =     "Crowdsourcing has attracted much attention for its
                 convenience to collect labels from non-expert workers
                 instead of experts. However, due to the high level of
                 noise from the non-experts, a label aggregation model
                 that infers the true label from noisy \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:MNV,
  author =       "Hui-Jia Li and Lin Wang and Zhan Bu and Jie Cao and
                 Yong Shi",
  title =        "Measuring the Network Vulnerability Based on {Markov}
                 Criticality",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "28:1--28:24",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464390",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464390",
  abstract =     "Vulnerability assessment-a critical issue for
                 networks-attempts to foresee unexpected destructive
                 events or hostile attacks in the whole system. In this
                 article, we consider a new Markov global connectivity
                 metric-Kemeny constant, and take its derivative
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:SBA,
  author =       "Guangtao Wang and Gao Cong and Ying Zhang and Zhen Hai
                 and Jieping Ye",
  title =        "A Synopsis Based Approach for Itemset Frequency
                 Estimation over Massive Multi-Transaction Stream",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "29:1--29:30",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3465238",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3465238",
  abstract =     "The streams where multiple transactions are associated
                 with the same key are prevalent in practice, e.g., a
                 customer has multiple shopping records arriving at
                 different time. Itemset frequency estimation on such
                 streams is very challenging since sampling \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yin:2022:WFM,
  author =       "Jianfei Yin and Ruili Wang and Yeqing Guo and Yizhe
                 Bai and Shunda Ju and Weili Liu and Joshua Zhexue
                 Huang",
  title =        "Wealth Flow Model: Online Portfolio Selection Based on
                 Learning Wealth Flow Matrices",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "30:1--30:27",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464308",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464308",
  abstract =     "This article proposes a deep learning solution to the
                 online portfolio selection problem based on learning a
                 latent structure directly from a price time series. It
                 introduces a novel wealth flow matrix for representing
                 a latent structure that has special \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hidalgo:2022:DAD,
  author =       "Juan I. G. Hidalgo and Silas G. T. C. Santos and
                 Roberto S. M. Barros",
  title =        "Dynamically Adjusting Diversity in Ensembles for the
                 Classification of Data Streams with Concept Drift",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "31:1--31:20",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466616",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466616",
  abstract =     "A data stream can be defined as a system that
                 continually generates a lot of data over time. Today,
                 processing data streams requires new demands and
                 challenging tasks in the data mining and machine
                 learning areas. Concept Drift is a problem commonly
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cantini:2022:LSH,
  author =       "Riccardo Cantini and Fabrizio Marozzo and Giovanni
                 Bruno and Paolo Trunfio",
  title =        "Learning Sentence-to-Hashtags Semantic Mapping for
                 Hashtag Recommendation on Microblogs",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "32:1--32:26",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466876",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466876",
  abstract =     "The growing use of microblogging platforms is
                 generating a huge amount of posts that need effective
                 methods to be classified and searched. In Twitter and
                 other social media platforms, hashtags are exploited by
                 users to facilitate the search, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Abebe:2022:ODO,
  author =       "Rediet Abebe and T.-H. HUBERT Chan and Jon Kleinberg
                 and Zhibin Liang and David Parkes and Mauro Sozio and
                 Charalampos E. Tsourakakis",
  title =        "Opinion Dynamics Optimization by Varying
                 Susceptibility to Persuasion via Non-Convex Local
                 Search",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "33:1--33:34",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3466617",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3466617",
  abstract =     "A long line of work in social psychology has studied
                 variations in people's susceptibility to persuasion-the
                 extent to which they are willing to modify their
                 opinions on a topic. This body of literature suggests
                 an interesting perspective on theoretical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2022:SHS,
  author =       "Yang Yang and Hongchen Wei and Zhen-Qiang Sun and
                 Guang-Yu Li and Yuanchun Zhou and Hui Xiong and Jian
                 Yang",
  title =        "{S2OSC}: a Holistic Semi-Supervised Approach for Open
                 Set Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "34:1--34:27",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3468675",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3468675",
  abstract =     "Open set classification (OSC) tackles the problem of
                 determining whether the data are in-class or
                 out-of-class during inference, when only provided with
                 a set of in-class examples at training time.
                 Traditional OSC methods usually train discriminative or
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:EHI,
  author =       "Yiding Zhang and Xiao Wang and Nian Liu and Chuan
                 Shi",
  title =        "Embedding Heterogeneous Information Network in
                 Hyperbolic Spaces",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "35:1--35:23",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3468674",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3468674",
  abstract =     "Heterogeneous information network (HIN) embedding,
                 aiming to project HIN into a low-dimensional space, has
                 attracted considerable research attention. Most of the
                 existing HIN embedding methods focus on preserving the
                 inherent network structure and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:CCN,
  author =       "Xueyuan Wang and Hongpo Zhang and Zongmin Wang and
                 Yaqiong Qiao and Jiangtao Ma and Honghua Dai",
  title =        "{Con\&Net}: a Cross-Network Anchor Link Discovery
                 Method Based on Embedding Representation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "36:1--36:18",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3469083",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3469083",
  abstract =     "Cross-network anchor link discovery is an important
                 research problem and has many applications in
                 heterogeneous social network. Existing schemes of
                 cross-network anchor link discovery can provide
                 reasonable link discovery results, but the quality of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:HVA,
  author =       "Hangbin Zhang and Raymond K. Wong and Victor W. Chu",
  title =        "Hybrid Variational Autoencoder for Recommender
                 Systems",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "37:1--37:37",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3470659",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470659",
  abstract =     "E-commerce platforms heavily rely on automatic
                 personalized recommender systems, e.g., collaborative
                 filtering models, to improve customer experience. Some
                 hybrid models have been proposed recently to address
                 the deficiency of existing models. However, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Oliveira:2022:ALS,
  author =       "Lucas {Santos De Oliveira and} Pedro O. S. Vaz-De-Melo
                 and Aline {Carneiro Viana}",
  title =        "Assessing Large-Scale Power Relations among Locations
                 from Mobility Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "38:1--38:31",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3470770",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470770",
  abstract =     "The pervasiveness of smartphones has shaped our lives,
                 social norms, and the structure that dictates human
                 behavior. They now directly influence how individuals
                 demand resources or interact with network services.
                 From this scenario, identifying key \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:KKR,
  author =       "Zhenyu Zhang and Lei Zhang and Dingqi Yang and Liu
                 Yang",
  title =        "{KRAN}: Knowledge Refining Attention Network for
                 Recommendation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "39:1--39:20",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3470783",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3470783",
  abstract =     "Recommender algorithms combining knowledge graph and
                 graph convolutional network are becoming more and more
                 popular recently. Specifically, attributes describing
                 the items to be recommended are often used as
                 additional information. These attributes along
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2022:STE,
  author =       "Liang Zhao and Yuyang Gao and Jieping Ye and Feng Chen
                 and Yanfang Ye and Chang-Tien Lu and Naren
                 Ramakrishnan",
  title =        "Spatio-Temporal Event Forecasting Using Incremental
                 Multi-Source Feature Learning",
  journal =      j-TKDD,
  volume =       "16",
  number =       "2",
  pages =        "40:1--40:28",
  month =        apr,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3464976",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Tue Sep 14 07:09:40 MDT 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3464976",
  abstract =     "The forecasting of significant societal events such as
                 civil unrest and economic crisis is an interesting and
                 challenging problem which requires both timeliness,
                 precision, and comprehensiveness. Significant societal
                 events are influenced and indicated \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lei:2022:ODR,
  author =       "Shuo Lei and Xuchao Zhang and Liang Zhao and Arnold P.
                 Boedihardjo and Chang-Tien Lu",
  title =        "Online and Distributed Robust Regressions with
                 Extremely Noisy Labels",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "41:1--41:24",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473038",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473038",
  abstract =     "In today's era of big data, robust least-squares
                 regression becomes a more challenging problem when
                 considering the extremely corrupted labels along with
                 explosive growth of datasets. Traditional robust
                 methods can handle the noise but suffer from several
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:KDA,
  author =       "Xingjian Li and Haoyi Xiong and Zeyu Chen and Jun Huan
                 and Ji Liu and Cheng-Zhong Xu and Dejing Dou",
  title =        "Knowledge Distillation with Attention for Deep
                 Transfer Learning of Convolutional Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "42:1--42:20",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473912",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473912",
  abstract =     "Transfer learning through fine-tuning a pre-trained
                 neural network with an extremely large dataset, such as
                 ImageNet, can significantly improve and accelerate
                 training while the accuracy is frequently bottlenecked
                 by the limited dataset size of the new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nashaat:2022:SSE,
  author =       "Mona Nashaat and Aindrila Ghosh and James Miller and
                 Shaikh Quader",
  title =        "Semi-Supervised Ensemble Learning for Dealing with
                 Inaccurate and Incomplete Supervision",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "43:1--43:33",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473910",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473910",
  abstract =     "In real-world tasks, obtaining a large set of
                 noise-free data can be prohibitively expensive.
                 Therefore, recent research tries to enable machine
                 learning to work with weakly supervised datasets, such
                 as inaccurate or incomplete data. However, the previous
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shao:2022:NEM,
  author =       "Ping Shao and Yang Yang and Shengyao Xu and Chunping
                 Wang",
  title =        "Network Embedding via Motifs",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "44:1--44:20",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3473911",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3473911",
  abstract =     "Network embedding has emerged as an effective way to
                 deal with downstream tasks, such as node classification
                 [ 16 , 31 , 42 ]. Most existing methods leverage
                 multi-similarities between nodes such as connectivity,
                 which considers vertices that are closely \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kuang:2022:BSS,
  author =       "Kun Kuang and Hengtao Zhang and Runze Wu and Fei Wu
                 and Yueting Zhuang and Aijun Zhang",
  title =        "Balance-Subsampled Stable Prediction Across Unknown
                 Test Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "45:1--45:21",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477052",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477052",
  abstract =     "In data mining and machine learning, it is commonly
                 assumed that training and test data share the same
                 population distribution. However, this assumption is
                 often violated in practice because of the sample
                 selection bias, which might induce the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2022:DDG,
  author =       "Ling Chen and Xing Tang and Weiqi Chen and Yuntao Qian
                 and Yansheng Li and Yongjun Zhang",
  title =        "{DACHA}: a Dual Graph Convolution Based Temporal
                 Knowledge Graph Representation Learning Method Using
                 Historical Relation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "46:1--46:18",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477051",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477051",
  abstract =     "Temporal knowledge graph (TKG) representation learning
                 embeds relations and entities into a continuous
                 low-dimensional vector space by incorporating temporal
                 information. Latest studies mainly aim at learning
                 entity representations by modeling entity \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:CAS,
  author =       "Huandong Wang and Yong Li and Junjie Lin and Hancheng
                 Cao and Depeng Jin",
  title =        "Context-Aware Semantic Annotation of Mobility
                 Records",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "47:1--47:20",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477048",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477048",
  abstract =     "The wide adoption of mobile devices has provided us
                 with a massive volume of human mobility records.
                 However, a large portion of these records is unlabeled,
                 i.e., only have GPS coordinates without semantic
                 information (e.g., Point of Interest (POI)). To
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shi:2022:CLC,
  author =       "Tian Shi and Xuchao Zhang and Ping Wang and Chandan K.
                 Reddy",
  title =        "Corpus-level and Concept-based Explanations for
                 Interpretable Document Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "48:1--48:17",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477539",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477539",
  abstract =     "Using attention weights to identify information that
                 is important for models' decision making is a popular
                 approach to interpret attention-based neural networks.
                 This is commonly realized in practice through the
                 generation of a heat-map for every single \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Feng:2022:CAS,
  author =       "Jie Feng and Yong Li and Ziqian Lin and Can Rong and
                 Funing Sun and Diansheng Guo and Depeng Jin",
  title =        "Context-aware Spatial-Temporal Neural Network for
                 Citywide Crowd Flow Prediction via Modeling Long-range
                 Spatial Dependency",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "49:1--49:21",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477577",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477577",
  abstract =     "Crowd flow prediction is of great importance in a wide
                 range of applications from urban planning, traffic
                 control to public safety. It aims at predicting the
                 inflow (the traffic of crowds entering a region in a
                 given time interval) and outflow (the \ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2022:UAN,
  author =       "Yang Zhou and Jiaxiang Ren and Ruoming Jin and Zijie
                 Zhang and Jingyi Zheng and Zhe Jiang and Da Yan and
                 Dejing Dou",
  title =        "Unsupervised Adversarial Network Alignment with
                 Reinforcement Learning",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "50:1--50:29",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477050",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477050",
  abstract =     "Network alignment, which aims at learning a matching
                 between the same entities across multiple information
                 networks, often suffers challenges from feature
                 inconsistency, high-dimensional features, to unstable
                 alignment results. This article presents a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2022:NMN,
  author =       "Youxi Wu and Lanfang Luo and Yan Li and Lei Guo and
                 Philippe Fournier-Viger and Xingquan Zhu and Xindong
                 Wu",
  title =        "{NTP-Miner}: Nonoverlapping Three-Way Sequential
                 Pattern Mining",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "51:1--51:21",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480245",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480245",
  abstract =     "Nonoverlapping sequential pattern mining is an
                 important type of sequential pattern mining (SPM) with
                 gap constraints, which not only can reveal interesting
                 patterns to users but also can effectively reduce the
                 search space using the Apriori (anti-\ldots{})",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2022:NPO,
  author =       "Yuanchun Jiang and Ruicheng Liang and Ji Zhang and
                 Jianshan Sun and Yezheng Liu and Yang Qian",
  title =        "Network Public Opinion Detection During the
                 Coronavirus Pandemic: a Short-Text Relational Topic
                 Model",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "52:1--52:27",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480246",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480246",
  abstract =     "Online social media provides rich and varied
                 information reflecting the significant concerns of the
                 public during the coronavirus pandemic. Analyzing what
                 the public is concerned with from social media
                 information can support policy-makers to maintain the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2022:GCI,
  author =       "Heli Sun and Yang Li and Bing Lv and Wujie Yan and
                 Liang He and Shaojie Qiao and Jianbin Huang",
  title =        "{Graph Community Infomax}",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "53:1--53:21",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480244",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480244",
  abstract =     "Graph representation learning aims at learning
                 low-dimension representations for nodes in graphs, and
                 has been proven very useful in several downstream
                 tasks. In this article, we propose a new model, Graph
                 Community Infomax (GCI), that can adversarial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:SGQ,
  author =       "Guliu Liu and Lei Li and Guanfeng Liu and Xindong Wu",
  title =        "Social Group Query Based on Multi-Fuzzy-Constrained
                 Strong Simulation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "54:1--54:27",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3481640",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3481640",
  abstract =     "Traditional social group analysis mostly uses
                 interaction models, event models, or other social
                 network analysis methods to identify and distinguish
                 groups. This type of method can divide social
                 participants into different groups based on their
                 geographic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liang:2022:NFB,
  author =       "Shangsong Liang and Zhuo Ouyang and Zaiqiao Meng",
  title =        "A Normalizing Flow-Based Co-Embedding Model for
                 Attributed Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "55:1--55:31",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3477049",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3477049",
  abstract =     "Network embedding is a technique that aims at
                 inferring the low-dimensional representations of nodes
                 in a semantic space. In this article, we study the
                 problem of inferring the low-dimensional
                 representations of both nodes and attributes for
                 attributed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2022:TAG,
  author =       "Yonghui Xu and Shengjie Sun and Huiguo Zhang and
                 Chang'an Yi and Yuan Miao and Dong Yang and Xiaonan
                 Meng and Yi Hu and Ke Wang and Huaqing Min and Hengjie
                 Song and Chuanyan Miao",
  title =        "Time-Aware Graph Embedding: a Temporal Smoothness and
                 Task-Oriented Approach",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "56:1--56:23",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3480243",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3480243",
  abstract =     "Knowledge graph embedding, which aims at learning the
                 low-dimensional representations of entities and
                 relationships, has attracted considerable research
                 efforts recently. However, most knowledge graph
                 embedding methods focus on the structural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sowah:2022:HEH,
  author =       "Robert A. Sowah and Bernard Kuditchar and Godfrey A.
                 Mills and Amevi Acakpovi and Raphael A. Twum and Gifty
                 Buah and Robert Agboyi",
  title =        "{HCBST}: an Efficient Hybrid Sampling Technique for
                 Class Imbalance Problems",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "57:1--57:37",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488280",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488280",
  abstract =     "Class imbalance problem is prevalent in many
                 real-world domains. It has become an active area of
                 research. In binary classification problems, imbalance
                 learning refers to learning from a dataset with a high
                 degree of skewness to the negative class. This
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jin:2022:TUE,
  author =       "Junchen Jin and Mark Heimann and Di Jin and Danai
                 Koutra",
  title =        "Toward Understanding and Evaluating Structural Node
                 Embeddings",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "58:1--58:32",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3481639",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3481639",
  abstract =     "While most network embedding techniques model the
                 proximity between nodes in a network, recently there
                 has been significant interest in structural embeddings
                 that are based on node equivalences, a notion rooted in
                 sociology: equivalences or positions are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2022:DFE,
  author =       "Mengzhuo Guo and Zhongzhi Xu and Qingpeng Zhang and
                 Xiuwu Liao and Jiapeng Liu",
  title =        "Deciphering Feature Effects on Decision-Making in
                 Ordinal Regression Problems: an Explainable Ordinal
                 Factorization Model",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "59:1--59:26",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487048",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487048",
  abstract =     "Ordinal regression predicts the objects' labels that
                 exhibit a natural ordering, which is vital to
                 decision-making problems such as credit scoring and
                 clinical diagnosis. In these problems, the ability to
                 explain how the individual features and their
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2022:SMH,
  author =       "Jerry Chun-Wei Lin and Youcef Djenouri and Gautam
                 Srivastava and Yuanfa Li and Philip S. Yu",
  title =        "Scalable Mining of High-Utility Sequential Patterns
                 With Three-Tier {MapReduce} Model",
  journal =      j-TKDD,
  volume =       "16",
  number =       "3",
  pages =        "60:1--60:26",
  month =        jun,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487046",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Dec 10 11:04:18 MST 2021",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487046",
  abstract =     "High-utility sequential pattern mining (HUSPM) is a
                 hot research topic in recent decades since it combines
                 both sequential and utility properties to reveal more
                 information and knowledge rather than the traditional
                 frequent itemset mining or sequential \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gupta:2022:CDL,
  author =       "Manish Gupta and Puneet Agrawal",
  title =        "Compression of Deep Learning Models for Text: a
                 Survey",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "61:1--61:55",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487045",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487045",
  abstract =     "In recent years, the fields of natural language
                 processing (NLP) and information retrieval (IR) have
                 made tremendous progress thanks to deep learning models
                 like Recurrent Neural Networks (RNNs), Gated Recurrent
                 Units (GRUs) and Long Short-Term Memory \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:FMM,
  author =       "Chang Liu and Jie Yan and Feiyue Guo and Min Guo",
  title =        "Forecasting the Market with Machine Learning
                 Algorithms: an Application of {NMC-BERT-LSTM-DQN-X}
                 Algorithm in Quantitative Trading",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "62:1--62:22",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488378",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488378",
  abstract =     "Although machine learning (ML) algorithms have been
                 widely used in forecasting the trend of stock market
                 indices, they failed to consider the following crucial
                 aspects for market forecasting: (1) that investors'
                 emotions and attitudes toward future market \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:RRH,
  author =       "Danlu Liu and Yu Li and William Baskett and Dan Lin
                 and Chi-Ren Shyu",
  title =        "{RHPTree}-Risk Hierarchical Pattern Tree for Scalable
                 Long Pattern Mining",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "63:1--63:33",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488380",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488380",
  abstract =     "Risk patterns are crucial in biomedical research and
                 have served as an important factor in precision health
                 and disease prevention. Despite recent development in
                 parallel and high-performance computing, existing risk
                 pattern mining methods still struggle \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ma:2022:MIF,
  author =       "Muyang Ma and Pengjie Ren and Zhumin Chen and Zhaochun
                 Ren and Lifan Zhao and Peiyu Liu and Jun Ma and Maarten
                 de Rijke",
  title =        "Mixed Information Flow for Cross-Domain Sequential
                 Recommendations",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "64:1--64:32",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487331",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487331",
  abstract =     "Cross-domain sequential recommendation is the task of
                 predict the next item that the user is most likely to
                 interact with based on past sequential behavior from
                 multiple domains. One of the key challenges in
                 cross-domain sequential recommendation is to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fu:2022:TTR,
  author =       "Zhe Fu and Li Yu and Xi Niu",
  title =        "{TRACE}: Travel Reinforcement Recommendation Based on
                 Location-Aware Context Extraction",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "65:1--65:22",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3487047",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3487047",
  abstract =     "As the popularity of online travel platforms
                 increases, users tend to make ad-hoc decisions on
                 places to visit rather than preparing the detailed tour
                 plans in advance. Under the situation of timeliness and
                 uncertainty of users' demand, how to integrate
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2022:CFS,
  author =       "Kui Yu and Yajing Yang and Wei Ding",
  title =        "Causal Feature Selection with Missing Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "66:1--66:24",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3488055",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3488055",
  abstract =     "Causal feature selection aims at learning the Markov
                 blanket (MB) of a class variable for feature selection.
                 The MB of a class variable implies the local causal
                 structure among the class variable and its MB and all
                 other features are probabilistically \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gao:2022:TES,
  author =       "Fei Gao and Jiada Li and Yisu Ge and Jianwen Shao and
                 Shufang Lu and Libo Weng",
  title =        "A Trajectory Evaluator by Sub-tracks for Detecting
                 {VOT}-based Anomalous Trajectory",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "67:1--67:19",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3490032",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3490032",
  abstract =     "With the popularization of visual object tracking
                 (VOT), more and more trajectory data are obtained and
                 have begun to gain widespread attention in the fields
                 of mobile robots, intelligent video surveillance, and
                 the like. How to clean the anomalous \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jafariakinabad:2022:SSR,
  author =       "Fereshteh Jafariakinabad and Kien A. Hua",
  title =        "A Self-Supervised Representation Learning of Sentence
                 Structure for Authorship Attribution",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "68:1--68:16",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3491203",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491203",
  abstract =     "The syntactic structure of sentences in a document
                 substantially informs about its authorial writing
                 style. Sentence representation learning has been widely
                 explored in recent years and it has been shown that it
                 improves the generalization of different \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2022:PPM,
  author =       "Honghui Xu and Zhipeng Cai and Wei Li",
  title =        "Privacy-Preserving Mechanisms for Multi-Label Image
                 Recognition",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "69:1--69:21",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3491231",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3491231",
  abstract =     "Multi-label image recognition has been an
                 indispensable fundamental component for many real
                 computer vision applications. However, a severe threat
                 of privacy leakage in multi-label image recognition has
                 been overlooked by existing studies. To fill this
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Abulaish:2022:DSK,
  author =       "Muhammad Abulaish and Mohd Fazil and Mohammed J.
                 Zaki",
  title =        "Domain-Specific Keyword Extraction Using Joint
                 Modeling of Local and Global Contextual Semantics",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "70:1--70:30",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494560",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494560",
  abstract =     "Domain-specific keyword extraction is a vital task in
                 the field of text mining. There are various research
                 tasks, such as spam e-mail classification, abusive
                 language detection, sentiment analysis, and emotion
                 mining, where a set of domain-specific \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yuan:2022:AMS,
  author =       "Mu Yuan and Lan Zhang and Xiang-Yang Li and Lin-Zhuo
                 Yang and Hui Xiong",
  title =        "Adaptive Model Scheduling for Resource-efficient Data
                 Labeling",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "71:1--71:22",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494559",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494559",
  abstract =     "Labeling data (e.g., labeling the people, objects,
                 actions, and scene in images) comprehensively and
                 efficiently is a widely needed but challenging task.
                 Numerous models were proposed to label various data and
                 many approaches were designed to enhance the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:DEL,
  author =       "Min-Ling Zhang and Jing-Han Wu and Wei-Xuan Bao",
  title =        "Disambiguation Enabled Linear Discriminant Analysis
                 for Partial Label Dimensionality Reduction",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "72:1--72:18",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494565",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494565",
  abstract =     "As an emerging weakly supervised learning framework,
                 partial label learning considers inaccurate supervision
                 where each training example is associated with multiple
                 candidate labels among which only one is valid. In this
                 article, a first attempt toward \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2022:AMP,
  author =       "Chenji Huang and Yixiang Fang and Xuemin Lin and Xin
                 Cao and Wenjie Zhang",
  title =        "{ABLE}: Meta-Path Prediction in Heterogeneous
                 Information Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "73:1--73:21",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494558",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494558",
  abstract =     "Given a heterogeneous information network (HIN) H, a
                 head node h, a meta-path P, and a tail node t, the
                 meta-path prediction aims at predicting whether h can
                 be linked to t by an instance of P. Most existing
                 solutions either require predefined meta-paths,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "73",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yuan:2022:AIC,
  author =       "Junkun Yuan and Anpeng Wu and Kun Kuang and Bo Li and
                 Runze Wu and Fei Wu and Lanfen Lin",
  title =        "{Auto IV}: Counterfactual Prediction via Automatic
                 Instrumental Variable Decomposition",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "74:1--74:20",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494568",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494568",
  abstract =     "Instrumental variables (IVs), sources of treatment
                 randomization that are conditionally independent of the
                 outcome, play an important role in causal inference
                 with unobserved confounders. However, the existing
                 IV-based counterfactual prediction methods \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "74",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bhatia:2022:RTA,
  author =       "Siddharth Bhatia and Rui Liu and Bryan Hooi and Minji
                 Yoon and Kijung Shin and Christos Faloutsos",
  title =        "Real-Time Anomaly Detection in Edge Streams",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "75:1--75:22",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494564",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494564",
  abstract =     "Given a stream of graph edges from a dynamic graph,
                 how can we assign anomaly scores to edges in an online
                 manner, for the purpose of detecting unusual behavior,
                 using constant time and memory? Existing approaches aim
                 to detect individually surprising. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "75",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sheshbolouki:2022:SBA,
  author =       "Aida Sheshbolouki and M. Tamer {\"O}zsu",
  title =        "{sGrapp}: Butterfly Approximation in Streaming
                 Graphs",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "76:1--76:43",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3495011",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3495011",
  abstract =     "We study the fundamental problem of butterfly (i.e.,
                 (2,2)-bicliques) counting in bipartite streaming
                 graphs. Similar to triangles in unipartite graphs,
                 enumerating butterflies is crucial in understanding the
                 structure of bipartite graphs. This benefits \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "76",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2022:MGL,
  author =       "Hanrui Wu and Michael K. Ng",
  title =        "Multiple Graphs and Low-Rank Embedding for
                 Multi-Source Heterogeneous Domain Adaptation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "77:1--77:25",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3492804",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3492804",
  abstract =     "Multi-source domain adaptation is a challenging topic
                 in transfer learning, especially when the data of each
                 domain are represented by different kinds of features,
                 i.e., Multi-source Heterogeneous Domain Adaptation
                 (MHDA). It is important to take \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "77",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Prokhorenkova:2022:WLM,
  author =       "Liudmila Prokhorenkova and Alexey Tikhonov and Nelly
                 Litvak",
  title =        "When Less Is More: Systematic Analysis of
                 Cascade-Based Community Detection",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "78:1--78:22",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494563",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494563",
  abstract =     "Information diffusion, spreading of infectious
                 diseases, and spreading of rumors are fundamental
                 processes occurring in real-life networks. In many
                 practical cases, one can observe when nodes become
                 infected, but the underlying network, over which a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "78",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2022:DTA,
  author =       "Xu Yang and Chao Song and Mengdi Yu and Jiqing Gu and
                 Ming Liu",
  title =        "Distributed Triangle Approximately Counting Algorithms
                 in Simple Graph Stream",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "79:1--79:43",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494562",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494562",
  abstract =     "Recently, the counting algorithm of local topology
                 structures, such as triangles, has been widely used in
                 social network analysis, recommendation systems, user
                 portraits and other fields. At present, the problem of
                 counting global and local triangles in a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "79",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2022:HCN,
  author =       "Hanrui Wu and Michael K. Ng",
  title =        "Hypergraph Convolution on Nodes-Hyperedges Network for
                 Semi-Supervised Node Classification",
  journal =      j-TKDD,
  volume =       "16",
  number =       "4",
  pages =        "80:1--80:19",
  month =        aug,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494567",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Jan 14 06:33:11 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494567",
  abstract =     "Hypergraphs have shown great power in representing
                 high-order relations among entities, and lots of
                 hypergraph-based deep learning methods have been
                 proposed to learn informative data representations for
                 the node classification problem. However, most of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "80",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shen:2022:MTM,
  author =       "Yanyan Shen and Baoyuan Ou and Ranzhen Li",
  title =        "{MBN}: Towards Multi-Behavior Sequence Modeling for
                 Next Basket Recommendation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "81:1--81:23",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3497748",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3497748",
  abstract =     "Next basket recommendation aims at predicting the next
                 set of items that a user would likely purchase
                 together, which plays an important role in e-commerce
                 platforms. Unlike conventional item recommendation, the
                 next basket recommendation focuses on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "81",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ke:2022:MRG,
  author =       "Xiangyu Ke and Arijit Khan and Francesco Bonchi",
  title =        "Multi-relation Graph Summarization",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "82:1--82:30",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494561",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494561",
  abstract =     "Graph summarization is beneficial in a wide range of
                 applications, such as visualization, interactive and
                 exploratory analysis, approximate query processing,
                 reducing the on-disk storage footprint, and graph
                 processing in modern hardware. However, the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "82",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2022:OLB,
  author =       "Weirong Chen and Jiaqi Zheng and Haoyu Yu and Guihai
                 Chen and Yixin Chen and Dongsheng Li",
  title =        "Online Learning Bipartite Matching with Non-stationary
                 Distributions",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "83:1--83:22",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502734",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502734",
  abstract =     "Online bipartite matching has attracted wide interest
                 since it can successfully model the popular online
                 car-hailing problem and sharing economy. Existing works
                 consider this problem under either adversary setting or
                 i.i.d. setting. The former is too \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "83",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ihou:2022:SVO,
  author =       "Koffi Eddy Ihou and Manar Amayri and Nizar Bouguila",
  title =        "Stochastic Variational Optimization of a Hierarchical
                 {Dirichlet} Process Latent Beta-{Liouville} Topic
                 Model",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "84:1--84:48",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502727",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502727",
  abstract =     "In topic models, collections are organized as
                 documents where they arise as mixtures over latent
                 clusters called topics. A topic is a distribution over
                 the vocabulary. In large-scale applications, parametric
                 or finite topic mixture models such as LDA \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "84",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Davvetas:2022:ETL,
  author =       "Athanasios Davvetas and Iraklis A. Klampanos and
                 Spiros Skiadopoulos and Vangelis Karkaletsis",
  title =        "Evidence Transfer: Learning Improved Representations
                 According to External Heterogeneous Task Outcomes",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "85:1--85:22",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502732",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502732",
  abstract =     "Unsupervised representation learning tends to produce
                 generic and reusable latent representations. However,
                 these representations can often miss high-level
                 features or semantic information, since they only
                 observe the implicit properties of the dataset.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "85",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Varde:2022:CES,
  author =       "Aparna S. Varde",
  title =        "Computational Estimation by Scientific Data Mining
                 with Classical Methods to Automate Learning Strategies
                 of Scientists",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "86:1--86:52",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502736",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502736",
  abstract =     "Experimental results are often plotted as
                 2-dimensional graphical plots (aka graphs) in
                 scientific domains depicting dependent versus
                 independent variables to aid visual analysis of
                 processes. Repeatedly performing laboratory experiments
                 consumes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "86",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2022:OSS,
  author =       "Peng Zhou and Shu Zhao and Yuanting Yan and Xindong
                 Wu",
  title =        "Online Scalable Streaming Feature Selection via
                 Dynamic Decision",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "87:1--87:20",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502737",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502737",
  abstract =     "Feature selection is one of the core concepts in
                 machine learning, which hugely impacts the model's
                 performance. For some real-world applications, features
                 may exist in a stream mode that arrives one by one over
                 time, while we cannot know the exact number \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "87",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pooja:2022:EHO,
  author =       "Km Pooja and Samrat Mondal and Joydeep Chandra",
  title =        "Exploiting Higher Order Multi-dimensional
                 Relationships with Self-attention for Author Name
                 Disambiguation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "88:1--88:23",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502730",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502730",
  abstract =     "Name ambiguity is a prevalent problem in scholarly
                 publications due to the unprecedented growth of digital
                 libraries and number of researchers. An author is
                 identified by their name in the absence of a unique
                 identifier. The documents of an author are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "88",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pei:2022:BHB,
  author =       "Shuyu Pei and Kun Xie and Xin Wang and Gaogang Xie and
                 Kenli Li and Wei Li and Yanbiao Li and Jigang Wen",
  title =        "{B$_h$BF}: a {Bloom} Filter Using {B$_h$} Sequences
                 for Multi-set Membership Query",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "89:1--89:26",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502735",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502735",
  abstract =     "Multi-set membership query is a fundamental issue for
                 network functions such as packet processing and state
                 machines monitoring. Given the rigid query speed and
                 memory requirements, it would be promising if a
                 multi-set query algorithm can be designed \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "89",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2022:DED,
  author =       "Ling Chen and Hongyu Shi",
  title =        "{DexDeepFM}: Ensemble Diversity Enhanced Extreme Deep
                 Factorization Machine Model",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "90:1--90:17",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3505272",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3505272",
  abstract =     "Predicting user positive response (e.g., purchases and
                 clicks) probability is a critical task in Web
                 applications. To identify predictive features from raw
                 data, the state-of-the-art extreme deep factorization
                 machine model (xDeepFM) introduces a new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "90",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Singh:2022:MLD,
  author =       "Shikha Singh and Emilie Chouzenoux and Giovanni
                 Chierchia and Angshul Majumdar",
  title =        "Multi-label Deep Convolutional Transform Learning for
                 Non-intrusive Load Monitoring",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "91:1--91:6",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502729",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502729",
  abstract =     "The objective of this letter is to propose a novel
                 computational method to learn the state of an appliance
                 (ON / OFF) given the aggregate power consumption
                 recorded by the smart-meter. We formulate a multi-label
                 classification problem where the classes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "91",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sato:2022:CTG,
  author =       "Ryoma Sato and Makoto Yamada and Hisashi Kashima",
  title =        "Constant Time Graph Neural Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "92:1--92:31",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502733",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502733",
  abstract =     "The recent advancements in graph neural networks
                 (GNNs) have led to state-of-the-art performances in
                 various applications, including chemo-informatics,
                 question-answering systems, and recommender systems.
                 However, scaling up these methods to huge graphs,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "92",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ling:2022:PAP,
  author =       "Zhaolong Ling and Kui Yu and Lin Liu and Jiuyong Li
                 and Yiwen Zhang and Xindong Wu",
  title =        "{PSL}: an Algorithm for Partial {Bayesian} Network
                 Structure Learning",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "93:1--93:25",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3508071",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3508071",
  abstract =     "Learning partial Bayesian network (BN) structure is an
                 interesting and challenging problem. In this challenge,
                 it is computationally expensive to use global BN
                 structure learning algorithms, while only one part of a
                 BN structure is interesting, local BN \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "93",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sharma:2022:IDA,
  author =       "Ms Promila Sharma and Uma Meena and Girish Kumar
                 Sharma",
  title =        "Intelligent Data Analysis using Optimized Support
                 Vector Machine Based Data Mining Approach for Tourism
                 Industry",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "94:1--94:20",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3494566",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3494566",
  abstract =     "Data analysis involves the deployment of sophisticated
                 approaches from data mining methods, information
                 theory, and artificial intelligence in various fields
                 like tourism, hospitality, and so on for the extraction
                 of knowledge from the gathered and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "94",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huai:2022:RML,
  author =       "Mengdi Huai and Tianhang Zheng and Chenglin Miao and
                 Liuyi Yao and Aidong Zhang",
  title =        "On the Robustness of Metric Learning: an Adversarial
                 Perspective",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "95:1--95:25",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502726",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502726",
  abstract =     "Metric learning aims at automatically learning a
                 distance metric from data so that the precise
                 similarity between data instances can be faithfully
                 reflected, and its importance has long been recognized
                 in many fields. An implicit assumption in existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "95",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qiu:2022:GNN,
  author =       "Zhaopeng Qiu and Yunfan Hu and Xian Wu",
  title =        "Graph Neural News Recommendation with User Existing
                 and Potential Interest Modeling",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "96:1--96:17",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3511708",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511708",
  abstract =     "Personalized news recommendations can alleviate the
                 information overload problem. To enable personalized
                 recommendation, one critical step is to learn a
                 comprehensive user representation to model her/his
                 interests. Many existing works learn user \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "96",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Goel:2022:QIP,
  author =       "Kanika Goel and Sander J. J. Leemans and Niels Martin
                 and Moe T. Wynn",
  title =        "Quality-Informed Process Mining: a Case for
                 Standardised Data Quality Annotations",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "97:1--97:47",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3511707",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511707",
  abstract =     "Real-life event logs, reflecting the actual executions
                 of complex business processes, are faced with numerous
                 data quality issues. Extensive data sanity checks and
                 pre-processing are usually needed before historical
                 data can be used as input to obtain \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "97",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2022:WWW,
  author =       "Hao Liu and Qingyu Guo and Hengshu Zhu and Fuzhen
                 Zhuang and Shenwen Yang and Dejing Dou and Hui Xiong",
  title =        "Who will Win the Data Science Competition? Insights
                 from {KDD Cup 2019} and Beyond",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "98:1--98:24",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3511896",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3511896",
  abstract =     "Data science competitions are becoming increasingly
                 popular for enterprises collecting advanced innovative
                 solutions and allowing contestants to sharpen their
                 data science skills. Most existing studies about data
                 science competitions have a focus on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "98",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Oliveira:2022:AMT,
  author =       "Saullo H. G. Oliveira and Andr{\'e} R. Gon{\c{c}}alves
                 and Fernando J. {Von Zuben}",
  title =        "Asymmetric Multi-Task Learning with Local
                 Transference",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "99:1--99:30",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3514252",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3514252",
  abstract =     "In this article, we present the Group Asymmetric
                 Multi-Task Learning (GAMTL) algorithm that
                 automatically learns from data how tasks transfer
                 information among themselves at the level of a subset
                 of features. In practice, for each group of features
                 GAMTL \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "99",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhong:2022:CFC,
  author =       "Sheng Zhong and Vinicius M. A. Souza and Abdullah
                 Mueen",
  title =        "Combining Filtering and Cross-Correlation Efficiently
                 for Streaming Time Series",
  journal =      j-TKDD,
  volume =       "16",
  number =       "5",
  pages =        "100:1--100:24",
  month =        oct,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502738",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed May 25 07:43:38 MDT 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502738",
  abstract =     "Monitoring systems have hundreds or thousands of
                 distributed sensors gathering and transmitting
                 real-time streaming data. The early detection of events
                 in these systems, such as an earthquake in a seismic
                 monitoring system, is the base for essential tasks
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "100",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jirina:2022:DFO,
  author =       "Marcel Jirina and Said Krayem",
  title =        "The Distance Function Optimization for the Near
                 Neighbors-Based Classifiers",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "101:1--101:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3434769",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3434769",
  abstract =     "Based on the analysis of conditions for a good
                 distance function we found four rules that should be
                 fulfilled. Then, we introduce two new distance
                 functions, a metric and a pseudometric one. We have
                 tested how they fit for distance-based classifiers,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "101",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tey:2022:MLB,
  author =       "Fu Jie Tey and Tin-Yu Wu and Jiann-Liang Chen",
  title =        "Machine Learning-based Short-term Rainfall Prediction
                 from Sky Data",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "102:1--102:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502731",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502731",
  abstract =     "To predict rainfall, our proposed model architecture
                 combines the Convolutional Neural Network (CNN), which
                 uses the ResNet-152 pre-training model, with the
                 Recurrent Neural Network (RNN), which uses the Long
                 Short-term Memory Network (LSTM) layer, for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "102",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mahmoud:2022:MOL,
  author =       "Reem A. Mahmoud and Hazem Hajj",
  title =        "Multi-objective Learning to Overcome Catastrophic
                 Forgetting in Time-series Applications",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "103:1--103:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3502728",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3502728",
  abstract =     "One key objective of artificial intelligence involves
                 the continuous adaptation of machine learning models to
                 new tasks. This branch of continual learning is also
                 referred to as lifelong learning (LL), where a major
                 challenge is to minimize catastrophic \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "103",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:GES,
  author =       "Zhaobo Wang and Yanmin Zhu and Qiaomei Zhang and
                 Haobing Liu and Chunyang Wang and Tong Liu",
  title =        "Graph-Enhanced Spatial-Temporal Network for Next {POI}
                 Recommendation",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "104:1--104:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3513092",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3513092",
  abstract =     "The task of next Point-of-Interest (POI)
                 recommendation aims at recommending a list of POIs for
                 a user to visit at the next timestamp based on his/her
                 previous interactions, which is valuable for both
                 location-based service providers and users. Recent
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "104",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tipirneni:2022:SST,
  author =       "Sindhu Tipirneni and Chandan K. Reddy",
  title =        "Self-Supervised Transformer for Sparse and Irregularly
                 Sampled Multivariate Clinical Time-Series",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "105:1--105:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3516367",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3516367",
  abstract =     "Multivariate time-series data are frequently observed
                 in critical care settings and are typically
                 characterized by sparsity (missing information) and
                 irregular time intervals. Existing approaches for
                 learning representations in this domain handle these
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "105",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gu:2022:IFS,
  author =       "Shilin Gu and Yuhua Qian and Chenping Hou",
  title =        "Incremental Feature Spaces Learning with Label
                 Scarcity",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "106:1--106:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3516368",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3516368",
  abstract =     "Recently, learning and mining from data streams with
                 incremental feature spaces have attracted extensive
                 attention, where data may dynamically expand over time
                 in both volume and feature dimensions. Existing
                 approaches usually assume that the incoming \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "106",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:DME,
  author =       "Zhe Li and Chunhua Sun and Chunli Liu and Xiayu Chen
                 and Meng Wang and Yezheng Liu",
  title =        "Dual-{MGAN}: an Efficient Approach for Semi-supervised
                 Outlier Detection with Few Identified Anomalies",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "107:1--107:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522690",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522690",
  abstract =     "Outlier detection is an important task in data mining,
                 and many technologies for it have been explored in
                 various applications. However, owing to the default
                 assumption that outliers are not concentrated,
                 unsupervised outlier detection may not correctly
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "107",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:NNE,
  author =       "Yu Wang and Hanghang Tong and Ziye Zhu and Yun Li",
  title =        "Nested Named Entity Recognition: a Survey",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "108:1--108:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522593",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522593",
  abstract =     "With the rapid development of text mining, many
                 studies observe that text generally contains a variety
                 of implicit information, and it is important to develop
                 techniques for extracting such information. Named
                 Entity Recognition (NER), the first step of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "108",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xiao:2022:TQI,
  author =       "Houping Xiao and Shiyu Wang",
  title =        "Toward Quality of Information Aware Distributed
                 Machine Learning",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "109:1--109:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522591",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522591",
  abstract =     "In the era of big data, data are usually distributed
                 across numerous connected computing and storage units
                 (i.e., nodes or workers). Under such an environment,
                 many machine learning problems can be reformulated as a
                 consensus optimization problem, which \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "109",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cai:2022:ANI,
  author =       "Jianghui Cai and Yuqing Yang and Haifeng Yang and
                 Xujun Zhao and Jing Hao",
  title =        "{ARIS}: a Noise Insensitive Data Pre-Processing Scheme
                 for Data Reduction Using Influence Space",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "110:1--110:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522592",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522592",
  abstract =     "The extensive growth of data quantity has posed many
                 challenges to data analysis and retrieval. Noise and
                 redundancy are typical representatives of the
                 above-mentioned challenges, which may reduce the
                 reliability of analysis and retrieval results and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "110",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2022:IBL,
  author =       "Xiaoying Zhang and Hong Xie and John C. S. Lui",
  title =        "Improving Bandit Learning Via Heterogeneous
                 Information Networks: Algorithms and Applications",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "111:1--111:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522590",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522590",
  abstract =     "Contextual bandit serves as an invaluable tool to
                 balance the exploration vs. exploitation tradeoff in
                 various applications such as online recommendation. In
                 many applications, heterogeneous information networks
                 (HINs) provide rich side information for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "111",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Desantis:2022:FBM,
  author =       "Derek Desantis and Erik Skau and Duc P. Truong and
                 Boian Alexandrov",
  title =        "Factorization of Binary Matrices: Rank Relations,
                 Uniqueness and Model Selection of {Boolean}
                 Decomposition",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "112:1--112:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522594",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522594",
  abstract =     "The application of binary matrices are numerous.
                 Representing a matrix as a mixture of a small
                 collection of latent vectors via low-rank decomposition
                 is often seen as an advantageous method to interpret
                 and analyze data. In this work, we examine the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "112",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2022:SWT,
  author =       "Xing Yang and Chen Zhang and Baihua Zheng",
  title =        "Segment-Wise Time-Varying Dynamic {Bayesian} Network
                 with Graph Regularization",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "113:1--113:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522589",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522589",
  abstract =     "Time-varying dynamic Bayesian network (TVDBN) is
                 essential for describing time-evolving directed
                 conditional dependence structures in complex
                 multivariate systems. In this article, we construct a
                 TVDBN model, together with a score-based method for its
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "113",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2022:CSD,
  author =       "Shaowei Wang and Lingling Zhang and Xuan Luo and Yi
                 Yang and Xin Hu and Tao Qin and Jun Liu",
  title =        "Computer Science Diagram Understanding with Topology
                 Parsing",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "114:1--114:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522689",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522689",
  abstract =     "Diagram is a special form of visual expression for
                 representing complex concepts, logic, and knowledge,
                 which widely appears in educational scenes such as
                 textbooks, blogs, and encyclopedias. Current research
                 on diagrams preliminarily focuses on natural \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "114",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jang:2022:FKS,
  author =       "Jun-Gi Jang and Chaeheum Park and Changwon Jang and
                 Geonsoo Kim and U. Kang",
  title =        "Finding Key Structures in {MMORPG} Graph with
                 Hierarchical Graph Summarization",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "115:1--115:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3522691",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3522691",
  abstract =     "What are the key structures existing in a large
                 real-world MMORPG (Massively Multiplayer Online
                 Role-Playing Game) graph? How can we compactly
                 summarize an MMORPG graph with hierarchical node
                 labels, considering substructures at different levels
                 of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "115",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2022:PPN,
  author =       "Nengjun Zhu and Jian Cao and Xinjiang Lu and Chuanren
                 Liu and Hao Liu and Yanyan Li and Xiangfeng Luo and Hui
                 Xiong",
  title =        "Predicting a Person's Next Activity Region with a
                 Dynamic Region-Relation-Aware Graph Neural Network",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "116:1--116:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529091",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529091",
  abstract =     "The understanding of people's inter-regional mobility
                 behaviors, such as predicting the next activity region
                 (AR) or uncovering the intentions for regional
                 mobility, is of great value to public administration or
                 business interests. While there are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "116",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xiong:2022:GDL,
  author =       "Haoyi Xiong and Ruosi Wan and Jian Zhao and Zeyu Chen
                 and Xingjian Li and Zhanxing Zhu and Jun Huan",
  title =        "{GrOD}: Deep Learning with Gradients Orthogonal
                 Decomposition for Knowledge Transfer, Distillation, and
                 Adversarial Training",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "117:1--117:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530836",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530836",
  abstract =     "Regularization that incorporates the linear
                 combination of empirical loss and explicit
                 regularization terms as the loss function has been
                 frequently used for many machine learning tasks. The
                 explicit regularization term is designed in different
                 types, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "117",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Benarous:2022:SLH,
  author =       "Maya Benarous and Eran Toch and Irad Ben-gal",
  title =        "Synthesis of Longitudinal Human Location Sequences:
                 Balancing Utility and Privacy",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "118:1--118:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529260",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529260",
  abstract =     "People's location data are continuously tracked from
                 various devices and sensors, enabling an ongoing
                 analysis of sensitive information that can violate
                 people's privacy and reveal confidential information.
                 Synthetic data have been used to generate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "118",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Coscia:2022:GEM,
  author =       "Michele Coscia",
  title =        "Generalized {Euclidean} Measure to Estimate Distances
                 on Multilayer Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "119:1--119:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3529396",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3529396",
  abstract =     "Estimating the distance covered by a spreading event
                 on a network can lead to a better understanding of
                 epidemics, economic growth, and human behavior. There
                 are many methods solving this problem-which has been
                 called Node Vector Distance (NVD)-for single \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "119",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yildiz:2022:SRR,
  author =       "Ilkay Yildiz and Jennifer Dy and Deniz Erdogmus and
                 Susan Ostmo and J. Peter Campbell and Michael F. Chiang
                 and Stratis Ioannidis",
  title =        "Spectral Ranking Regression",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "120:1--120:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530693",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530693",
  abstract =     "We study the problem of ranking regression, in which a
                 dataset of rankings is used to learn Plackett-Luce
                 scores as functions of sample features. We propose a
                 novel spectral algorithm to accelerate learning in
                 ranking regression. Our main technical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "120",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2022:PDB,
  author =       "Ziyue Li and Hao Yan and Fugee Tsung and Ke Zhang",
  title =        "Profile Decomposition Based Hybrid Transfer Learning
                 for Cold-Start Data Anomaly Detection",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "121:1--121:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3530990",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3530990",
  abstract =     "Anomaly detection is an essential task for quality
                 management in smart manufacturing. An accurate
                 data-driven detection method usually needs enough data
                 and labels. However, in practice, there commonly exist
                 newly set-up processes in manufacturing, and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "121",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hu:2022:SDP,
  author =       "Yue Hu and Ao Qu and Yanbing Wang and Daniel B. Work",
  title =        "Streaming Data Preprocessing via Online Tensor
                 Recovery for Large Environmental Sensor Networks",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "122:1--122:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532189",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532189",
  abstract =     "Measuring the built and natural environment at a
                 fine-grained scale is now possible with low-cost urban
                 environmental sensor networks. However, fine-grained
                 city-scale data analysis is complicated by tedious data
                 cleaning including removing outliers and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "122",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ma:2022:HFD,
  author =       "Pengfei Ma and Youxi Wu and Yan Li and Lei Guo and He
                 Jiang and Xingquan Zhu and Xindong Wu",
  title =        "{HW-Forest}: Deep Forest with Hashing Screening and
                 Window Screening",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "123:1--123:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532193",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532193",
  abstract =     "As a novel deep learning model, gcForest has been
                 widely used in various applications. However, current
                 multi-grained scanning of gcForest produces many
                 redundant feature vectors, and this increases the time
                 cost of the model. To screen out redundant \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "123",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pellegrina:2022:MMC,
  author =       "Leonardo Pellegrina and Cyrus Cousins and Fabio Vandin
                 and Matteo Riondato",
  title =        "{MCRapper}: {Monte-Carlo Rademacher} Averages for
                 Poset Families and Approximate Pattern Mining",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "124:1--124:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532187",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532187",
  abstract =     "``I'm an MC still as honest'' --- Eminem, Rap God We
                 present MCRapper, an algorithm for efficient
                 computation of Monte-Carlo Empirical Rademacher
                 Averages (MCERA) for families of functions exhibiting
                 poset (e.g., lattice) structure, such as those that
                 arise in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "124",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Bechini:2022:NBF,
  author =       "Alessio Bechini and Alessandro Bondielli and Jos{\'e}
                 Luis Corcuera B{\'a}rcena and Pietro Ducange and
                 Francesco Marcelloni and Alessandro Renda",
  title =        "A News-Based Framework for Uncovering and Tracking
                 City Area Profiles: Assessment in {Covid-19} Setting",
  journal =      j-TKDD,
  volume =       "16",
  number =       "6",
  pages =        "125:1--125:??",
  month =        dec,
  year =         "2022",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532186",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Wed Nov 16 08:20:02 MST 2022",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532186",
  abstract =     "In the last years, there has been an ever-increasing
                 interest in profiling various aspects of city life,
                 especially in the context of smart cities. This
                 interest has become even more relevant recently when we
                 have realized how dramatic events, such as the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "125",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:MCS,
  author =       "Ting-Yun Wang and Chiao-Ting Chen and Ju-Chun Huang
                 and Szu-Hao Huang",
  title =        "Modeling Cross-session Information with Multi-interest
                 Graph Neural Networks for the Next-item
                 Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532192",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532192",
  abstract =     "Next-item recommendation involves predicting the next
                 item of interest of a given user from their past
                 behavior. Users tend to browse and purchase various
                 items on e-commerce websites according to their varied
                 interests and needs, as reflected in their \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:SRM,
  author =       "Yu Wang and Chuan Chen and Jinrong Lai and Lele Fu and
                 Yuren Zhou and Zibin Zheng",
  title =        "A Self-Representation Method with Local Similarity
                 Preserving for Fast Multi-View Outlier Detection",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532191",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532191",
  abstract =     "With the rapidly growing attention to multi-view data
                 in recent years, multi-view outlier detection has
                 become a rising field with intense research. These
                 researches have made some success, but still exist some
                 issues that need to be solved. First, many \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:LAA,
  author =       "Ke Wang and Yanmin Zhu and Haobing Liu and Tianzi Zang
                 and Chunyang Wang",
  title =        "Learning Aspect-Aware High-Order Representations from
                 Ratings and Reviews for Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532188",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532188",
  abstract =     "Textual reviews contain rich semantic information that
                 is useful for making better recommendation, as such
                 semantic information may indicate more fine-grained
                 preferences of users. Recent efforts make considerable
                 improvement on recommendation by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:IGL,
  author =       "Zan Zhang and Lin Liu and Jiuyong Li and Xindong Wu",
  title =        "Integrating Global and Local Feature Selection for
                 Multi-Label Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532190",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532190",
  abstract =     "Multi-label learning deals with the problem where an
                 instance is associated with multiple labels
                 simultaneously. Multi-label data is often of high
                 dimensionality and has many noisy, irrelevant, and
                 redundant features. As an important machine learning
                 task, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:EAM,
  author =       "Xinye Chen and Stefan G{\"u}ttel",
  title =        "An Efficient Aggregation Method for the Symbolic
                 Representation of Temporal Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532622",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532622",
  abstract =     "Symbolic representations are a useful tool for the
                 dimension reduction of temporal data, allowing for the
                 efficient storage of and information retrieval from
                 time series. They can also enhance the training of
                 machine learning algorithms on time series \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:SSG,
  author =       "Lei Li and Mengjiao Yan and Zhenchao Tao and Huanhuan
                 Chen and Xindong Wu",
  title =        "Semi-Supervised Graph Pattern Matching and Rematching
                 for Expert Community Location",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532623",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532623",
  abstract =     "Graph pattern matching (GPM) is widely used in social
                 network analysis, such as expert finding, social group
                 query, and social position detection. Technically, GPM
                 is to find matched subgraphs that meet the requirements
                 of pattern graphs in big social \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gu:2023:IMC,
  author =       "Zhibin Gu and Songhe Feng",
  title =        "Individuality Meets Commonality: a Unified Graph
                 Learning Framework for Multi-View Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532612",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532612",
  abstract =     "Multi-view clustering, which aims at boosting the
                 clustering performance by leveraging the individual
                 information and the common information of multi-view
                 data, has gained extensive consideration in recent
                 years. However, most existing multi-view \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chakraborty:2023:SAM,
  author =       "Roshni Chakraborty and Ritwika Das and Joydeep
                 Chandra",
  title =        "{SigGAN}: Adversarial Model for Learning Signed
                 Relationships in Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532610",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532610",
  abstract =     "Signed link prediction in graphs is an important
                 problem that has applications in diverse domains. It is
                 a binary classification problem that predicts whether
                 an edge between a pair of nodes is positive or
                 negative. Existing approaches for link prediction
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:DGC,
  author =       "Fuxian Li and Jie Feng and Huan Yan and Guangyin Jin
                 and Fan Yang and Funing Sun and Depeng Jin and Yong
                 Li",
  title =        "Dynamic Graph Convolutional Recurrent Network for
                 Traffic Prediction: Benchmark and Solution",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532611",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532611",
  abstract =     "Traffic prediction is the cornerstone of intelligent
                 transportation system. Accurate traffic forecasting is
                 essential for the applications of smart cities, i.e.,
                 intelligent traffic management and urban planning.
                 Although various methods are proposed for \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2023:URD,
  author =       "Gengsen Huang and Wensheng Gan and Jian Weng and
                 Philip S. Yu",
  title =        "{US-Rule}: Discovering Utility-driven Sequential
                 Rules",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532613",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532613",
  abstract =     "Utility-driven mining is an important task in data
                 science and has many applications in real life.
                 High-utility sequential pattern mining (HUSPM) is one
                 kind of utility-driven mining. It aims at discovering
                 all sequential patterns with high utility. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:MCR,
  author =       "Jiapu Wang and Boyue Wang and Junbin Gao and Yongli Hu
                 and Baocai Yin",
  title =        "Multi-Concept Representation Learning for Knowledge
                 Graph Completion",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533017",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533017",
  abstract =     "Knowledge Graph Completion (KGC) aims at inferring
                 missing entities or relations by embedding them in a
                 low-dimensional space. However, most existing KGC
                 methods generally fail to handle the complex concepts
                 hidden in triplets, so the learned embeddings
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kirielle:2023:UGB,
  author =       "Nishadi Kirielle and Peter Christen and Thilina
                 Ranbaduge",
  title =        "Unsupervised Graph-Based Entity Resolution for Complex
                 Entities",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533016",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533016",
  abstract =     "Entity resolution (ER) is the process of linking
                 records that refer to the same entity. Traditionally,
                 this process compares attribute values of records to
                 calculate similarities and then classifies pairs of
                 records as referring to the same entity or not
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kwon:2023:EBM,
  author =       "Soonki Kwon and Younghoon Lee",
  title =        "Explainability-Based Mix-Up Approach for Text Data
                 Augmentation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533048",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533048",
  abstract =     "Text augmentation is a strategy for increasing the
                 diversity of training examples without explicitly
                 collecting new data. Owing to the efficiency and
                 effectiveness of text augmentation, numerous
                 augmentation methodologies have been proposed. Among
                 them, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:CBS,
  author =       "Qian Li and Xiangmeng Wang and Zhichao Wang and
                 Guandong Xu",
  title =        "Be Causal: De-Biasing Social Network Confounding in
                 Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533725",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533725",
  abstract =     "In recommendation systems, the existence of the
                 missing-not-at-random (MNAR) problem results in the
                 selection bias issue, degrading the recommendation
                 performance ultimately. A common practice to address
                 MNAR is to treat missing entries from the so-called
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Luo:2023:SDH,
  author =       "Xiao Luo and Haixin Wang and Daqing Wu and Chong Chen
                 and Minghua Deng and Jianqiang Huang and Xian-Sheng
                 Hua",
  title =        "A Survey on Deep Hashing Methods",
  journal =      j-TKDD,
  volume =       "17",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3532624",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Thu Mar 16 07:36:49 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/hash.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3532624",
  abstract =     "Nearest neighbor search aims at obtaining the samples
                 in the database with the smallest distances from them
                 to the queries, which is a basic task in a range of
                 fields, including computer vision and data mining.
                 Hashing is one of the most widely used methods for its
                 computational and storage efficiency. With the
                 development of deep learning, deep hashing methods show
                 more advantages than traditional methods. In this
                 survey, we detailedly investigate current deep hashing
                 algorithms including deep supervised hashing and deep
                 unsupervised hashing. Specifically, we categorize deep
                 supervised hashing methods into pairwise methods,
                 ranking-based methods, pointwise methods as well as
                 quantization according to how measuring the
                 similarities of the learned hash codes. Moreover, deep
                 unsupervised hashing is categorized into similarity
                 reconstruction-based methods, pseudo-label-based
                 methods, and prediction-free self-supervised
                 learning-based methods based on their semantic learning
                 manners. We also introduce three related important
                 topics including semi-supervised deep hashing, domain
                 adaption deep hashing, and multi-modal deep hashing.
                 Meanwhile, we present some commonly used public
                 datasets and the scheme to measure the performance of
                 deep hashing algorithms. Finally, we discuss some
                 potential research directions in conclusion.",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nguyen:2023:DAG,
  author =       "Hung T. Nguyen and Pierre J. Liang and Leman Akoglu",
  title =        "Detecting Anomalous Graphs in Labeled Multi-Graph
                 Databases",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "16:1--16:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3533770",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3533770",
  abstract =     "Within a large database G containing graphs with
                 labeled nodes and directed, multi-edges; how can we
                 detect the anomalous graphs? Most existing work are
                 designed for plain (unlabeled) and/or simple
                 (unweighted) graphs. We introduce CODEtect, the first.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lu:2023:SDI,
  author =       "Xun Lu and Songhe Feng",
  title =        "Structure Diversity-Induced Anchor Graph Fusion for
                 Multi-View Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "17:1--17:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3534931",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3534931",
  abstract =     "The anchor graph structure has been widely used to
                 speed up large-scale multi-view clustering and
                 exhibited promising performance. How to effectively
                 integrate the anchor graphs on multiple views to
                 achieve enhanced clustering performance still remains a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:GML,
  author =       "Lichen Wang and Zhengming Ding and Kasey Lee and
                 Seungju Han and Jae-Joon Han and Changkyu Choi and Yun
                 Fu",
  title =        "Generative Multi-Label Correlation Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "18:1--18:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3538708",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3538708",
  abstract =     "In real-world applications, a single instance could
                 have more than one label. To solve this task,
                 multi-label learning methods emerged in recent years.
                 It is a more challenging problem for many reasons, such
                 as complex label correlation, long-tail label
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lu:2023:DPE,
  author =       "Xun Lu and Songhe Feng and Gengyu Lyu and Yi Jin and
                 Congyan Lang",
  title =        "Distance-Preserving Embedding Adaptive Bipartite Graph
                 Multi-View Learning with Application to Multi-Label
                 Classification",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "19:1--19:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3537900",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3537900",
  abstract =     "Graph-based multi-view learning has attracted much
                 attention due to the efficacy of fusing the information
                 from different views. However, most of them exhibit
                 high computational complexity. We propose an
                 anchor-based bipartite graph embedding approach to
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:CUV,
  author =       "Qianru Wang and Bin Guo and Lu Cheng and Zhiwen Yu and
                 Huan Liu",
  title =        "{CausalSE}: Understanding Varied Spatial Effects with
                 Missing Data Toward Adding New Bike-sharing Stations",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "20:1--20:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3536427",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3536427",
  abstract =     "To meet the growing bike-sharing demands and make
                 people's travel convenient, the companies need to add
                 new stations at locations where demands exceed supply.
                 Before making reliable decisions on adding new
                 stations, it is required to understand the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jerez:2023:EAD,
  author =       "Carlos Ivan Jerez and Jun Zhang and Marcia R. Silva",
  title =        "On Equivalence of Anomaly Detection Algorithms",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "21:1--21:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3536428",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3536428",
  abstract =     "In most domains, anomaly detection is typically cast
                 as an unsupervised learning problem because of the
                 infeasibility of labeling large datasets. In this
                 setup, the evaluation and comparison of different
                 anomaly detection algorithms is difficult. Although
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Freris:2023:IEV,
  author =       "Nikolaos M. Freris and Ahmad Ajalloeian and Michalis
                 Vlachos",
  title =        "Interpretable Embedding and Visualization of
                 Compressed Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "22:1--22:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3537901",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3537901",
  abstract =     "Traditional embedding methodologies, also known as
                 dimensionality reduction techniques, assume the
                 availability of exact pairwise distances between the
                 high-dimensional objects that will be embedded in a
                 lower dimensionality. In this article, we propose
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:MIF,
  author =       "Shaokang Wang and Li Pan and Yu Wu",
  title =        "Meta-Information Fusion of Hierarchical Semantics
                 Dependency and Graph Structure for Structured Text
                 Classification",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "23:1--23:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3537971",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3537971",
  abstract =     "Structured text with plentiful hierarchical structure
                 information is an important part in real-world complex
                 texts. Structured text classification is attracting
                 more attention in natural language processing due to
                 the increasing complexity of application \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhu:2023:NGI,
  author =       "Xuliang Zhu and Xin Huang and Longxu Sun and Jiming
                 Liu",
  title =        "A Novel Graph Indexing Approach for Uncovering
                 Potential {COVID-19} Transmission Clusters",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "24:1--24:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3538492",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3538492",
  abstract =     "The COVID-19 pandemic has caused the society lockdowns
                 and a large number of deaths in many countries.
                 Potential transmission cluster discovery is to find all
                 suspected users with infections, which is greatly
                 needed to fast discover virus transmission \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Atyabi:2023:SCA,
  author =       "Adham Atyabi and Frederick Shic and Jiajun Jiang and
                 Claire E. Foster and Erin Barney and Minah Kim and
                 Beibin Li and Pamela Ventola and Chung Hao Chen",
  title =        "Stratification of Children with Autism Spectrum
                 Disorder Through Fusion of Temporal Information in
                 Eye-gaze Scan-Paths",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "25:1--25:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3539226",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3539226",
  abstract =     "Background: Looking pattern differences are shown to
                 separate individuals with Autism Spectrum Disorder
                 (ASD) and Typically Developing (TD) controls. Recent
                 studies have shown that, in children with ASD, these
                 patterns change with intellectual and social \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:GDF,
  author =       "Hongjie Chen and Ryan A. Rossi and Kanak Mahadik and
                 Sungchul Kim and Hoda Eldardiry",
  title =        "Graph Deep Factors for Probabilistic Time-series
                 Forecasting",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "26:1--26:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3543511",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3543511",
  abstract =     "Effective time-series forecasting methods are of
                 significant importance to solve a broad spectrum of
                 research problems. Deep probabilistic forecasting
                 techniques have recently been proposed for modeling
                 large collections of time-series. However, these
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jha:2023:SCL,
  author =       "Akshita Jha and Vineeth Rakesh and Jaideep
                 Chandrashekar and Adithya Samavedhi and Chandan K.
                 Reddy",
  title =        "Supervised Contrastive Learning for Interpretable
                 Long-Form Document Matching",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "27:1--27:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3542822",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3542822",
  abstract =     "Recent advancements in deep learning techniques have
                 transformed the area of semantic text matching (STM).
                 However, most state-of-the-art models are designed to
                 operate with short documents such as tweets, user
                 reviews, comments, and so on. These models \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Vajiac:2023:DIT,
  author =       "Catalina Vajiac and Meng-Chieh Lee and Aayushi
                 Kulshrestha and Sacha Levy and Namyong Park and Andreas
                 Olligschlaeger and Cara Jones and Reihaneh Rabbany and
                 Christos Faloutsos",
  title =        "{DeltaShield}: Information Theory for Human-
                 Trafficking Detection",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "28:1--28:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563040",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563040",
  abstract =     "Given a million escort advertisements, how can we spot
                 near-duplicates? Such micro-clusters of ads are usually
                 signals of human trafficking (HT). How can we summarize
                 them to convince law enforcement to act? Spotting
                 micro-clusters of near-duplicate \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2023:SHI,
  author =       "Jianhui Sun and Ying Yang and Guangxu Xun and Aidong
                 Zhang",
  title =        "Scheduling Hyperparameters to Improve Generalization:
                 From Centralized {SGD} to Asynchronous {SGD}",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "29:1--29:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544782",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544782",
  abstract =     "This article $^1$ studies how to schedule
                 hyperparameters to improve generalization of both
                 centralized single-machine stochastic gradient descent
                 (SGD) and distributed asynchronous SGD (ASGD). SGD
                 augmented with momentum variants (e.g., heavy ball
                 momentum (\ldots{}))",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Miao:2023:DPC,
  author =       "Xiaoye Miao and Huanhuan Peng and Yunjun Gao and
                 Zongfu Zhang and Jianwei Yin",
  title =        "On Dynamically Pricing Crowdsourcing Tasks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "2",
  pages =        "30:1--30:??",
  month =        feb,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544018",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:44 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544018",
  abstract =     "Crowdsourcing techniques have been extensively
                 explored in the past decade, including task allocation,
                 quality assessment, and so on. Most of professional
                 crowdsourcing platforms adopt the fixed pricing scheme
                 to offer a fixed price for crowd tasks. It is
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kamhoua:2023:GGG,
  author =       "Barakeel Fanseu Kamhoua and Lin Zhang and Kaili Ma and
                 James Cheng and Bo Li and Bo Han",
  title =        "{GRACE}: a General Graph Convolution Framework for
                 Attributed Graph Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "31:1--31:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544977",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544977",
  abstract =     "Attributed graph clustering (AGC) is an important
                 problem in graph mining as more and more complex data
                 in real-world have been represented in graphs with
                 attributed nodes. While it is a common practice to
                 leverage both attribute and structure information
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhai:2023:LGR,
  author =       "Penglong Zhai and Shihua Zhang",
  title =        "Learnable Graph-Regularization for Matrix
                 Decomposition",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "32:1--32:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3544781",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3544781",
  abstract =     "Low-rank approximation models of data matrices have
                 become important machine learning and data mining tools
                 in many fields, including computer vision, text mining,
                 bioinformatics, and many others. They allow for
                 embedding high-dimensional data into low-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:RLP,
  author =       "Jinwei Chen and Zefang Zong and Yunlin Zhuang and Huan
                 Yan and Depeng Jin and Yong Li",
  title =        "Reinforcement Learning for Practical Express Systems
                 with Mixed Deliveries and Pickups",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "33:1--33:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3546952",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3546952",
  abstract =     "In real-world express systems, couriers need to
                 satisfy not only the delivery demands but also the
                 pick-up demands of customers. Delivery and pickup tasks
                 are usually mixed together within integrated routing
                 plans. Such a mixed routing problem can be \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Feng:2023:CTE,
  author =       "Tao Feng and Sirui Song and Tong Xia and Yong Li",
  title =        "Contact Tracing and Epidemic Intervention via Deep
                 Reinforcement Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "34:1--34:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3546870",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3546870",
  abstract =     "The recent outbreak of COVID-19 poses a serious threat
                 to people's lives. Epidemic control strategies have
                 also caused damage to the economy by cutting off
                 humans' daily commute. In this article, we develop an
                 Individual-based Reinforcement Learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wan:2023:PMT,
  author =       "Mingyang Wan and Daochen Zha and Ninghao Liu and Na
                 Zou",
  title =        "In-Processing Modeling Techniques for Machine Learning
                 Fairness: a Survey",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "35:1--35:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3551390",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3551390",
  abstract =     "Machine learning models are becoming pervasive in
                 high-stakes applications. Despite their clear benefits
                 in terms of performance, the models could show
                 discrimination against minority groups and result in
                 fairness issues in a decision-making process,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Long:2023:MAC,
  author =       "Qiang Long and Adil Bagirov and Sona Taheri and Nargiz
                 Sultanova and Xue Wu",
  title =        "Methods and Applications of Clusterwise Linear
                 Regression: a Survey and Comparison",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "36:1--36:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3550074",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3550074",
  abstract =     "Clusterwise linear regression (CLR) is a well-known
                 technique for approximating a data using more than one
                 linear function. It is based on the combination of
                 clustering and multiple linear regression methods. This
                 article provides a comprehensive survey \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2023:OMO,
  author =       "Youxi Wu and Mingjie Chen and Yan Li and Jing Liu and
                 Zhao Li and Jinyan Li and Xindong Wu",
  title =        "{ONP-Miner}: One-off Negative Sequential Pattern
                 Mining",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "37:1--37:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3549940",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3549940",
  abstract =     "Negative sequential pattern mining (SPM) is an
                 important SPM research topic. Unlike positive SPM,
                 negative SPM can discover events that should have
                 occurred but have not occurred, and it can be used for
                 financial risk management and fraud detection.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Carchiolo:2023:ENP,
  author =       "Vincenza Carchiolo and Marco Grassia and Alessandro
                 Longheu and Michele Malgeri and Giuseppe Mangioni",
  title =        "Efficient Node {PageRank} Improvement via
                 Link-building using Geometric Deep Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "38:1--38:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3551642",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/pagerank.bib;
                 http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3551642",
  abstract =     "Centrality is a relevant topic in the field of network
                 research, due to its various theoretical and practical
                 implications. In general, all centrality metrics aim at
                 measuring the importance of nodes (according to some
                 definition of importance), and such importance scores
                 are used to rank the nodes in the network, therefore
                 the rank improvement is a strictly related topic. In a
                 given network, the rank improvement is achieved by
                 establishing new links, therefore the question shifts
                 to which and how many links should be collected to get
                 a desired rank. This problem, also known as
                 link-building has been shown to be NP-hard, and most
                 heuristics developed failed in obtaining good
                 performance with acceptable computational complexity.
                 In this article, we present LB--GDM, a novel approach
                 that leverages Geometric Deep Learning to tackle the
                 link-building problem. To validate our proposal, 31
                 real-world networks were considered; tests show that
                 LB--GDM performs significantly better than the
                 state-of-the-art heuristics, while having a comparable
                 or even lower computational complexity, which allows it
                 to scale well even to large networks.\ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2023:LLM,
  author =       "Linli Jiang and Chao-Xiong Chen and Chao Chen",
  title =        "{L2MM}: Learning to Map Matching with Deep Models for
                 Low-Quality {GPS} Trajectory Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "39:1--39:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3550486",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3550486",
  abstract =     "Map matching is a fundamental research topic with the
                 objective of aligning GPS trajectories to paths on the
                 road network. However, existing models fail to achieve
                 satisfactory performance for low-quality (i.e., noisy,
                 low-frequency, and non-uniform) \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:EIS,
  author =       "Yihong Zhang and Takahiro Hara",
  title =        "Explainable Integration of Social Media Background in
                 a Dynamic Neural Recommender",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "40:1--40:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3550279",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3550279",
  abstract =     "Recommender systems nowadays are commonly deployed in
                 e-commerce platforms to help customers making purchase
                 decisions. Dynamic recommender considers not only
                 static user-item interaction data, but the temporal
                 information at the time of recommendation. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:MRB,
  author =       "Yashen Wang and Zhaoyu Wang and Huanhuan Zhang and
                 Zhirun Liu",
  title =        "Microblog Retrieval Based on Concept-Enhanced
                 Pre-Training Model",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "41:1--41:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3552311",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3552311",
  abstract =     "Despite substantial interest in applications of neural
                 networks to information retrieval, neural ranking
                 models have mostly been applied to conventional ad-hoc
                 retrieval tasks over web pages and newswire articles.
                 This article proposes a concept-enhanced \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wei:2023:DSB,
  author =       "Xuemei Wei and Yezheng Liu and Jianshan Sun and
                 Yuanchun Jiang and Qifeng Tang and Kun Yuan",
  title =        "Dual Subgraph-Based Graph Neural Network for
                 Friendship Prediction in Location-Based Social
                 Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "42:1--42:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3554981",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3554981",
  abstract =     "With the wide use of Location-Based Social Networks
                 (LBSNs), predicting user friendship from online social
                 relations and offline trajectory data is of great value
                 to improve the platform service quality and user
                 satisfaction. Existing methods mainly focus \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2023:DTL,
  author =       "Xin Jiang and Zhengxin Yu and Chao Hai and Hongbo Liu
                 and Xindong Wu and Tomas Ward",
  title =        "{DNformer}: Temporal Link Prediction with Transfer
                 Learning in Dynamic Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "43:1--43:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3551892",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3551892",
  abstract =     "Temporal link prediction (TLP) is among the most
                 important graph learning tasks, capable of predicting
                 dynamic, time-varying links within networks. The key
                 problem of TLP is how to explore potential
                 link-evolving tendency from the increasing number of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Aleryani:2023:MIE,
  author =       "Aliya Aleryani and Aaron Bostrom and Wenjia Wang and
                 Beatriz Iglesia",
  title =        "Multiple Imputation Ensembles for Time Series
                 ({MIE-TS})",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "44:1--44:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3551643",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3551643",
  abstract =     "Time series classification has become an interesting
                 field of research, thanks to the extensive studies
                 conducted in the past two decades. Time series may have
                 missing data, which may affect both the representation
                 and also modeling of time series. Thus, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:VGA,
  author =       "Dongjie Li and Dong Li and Guang Lian",
  title =        "Variational Graph Autoencoder with Adversarial Mutual
                 Information Learning for Network Representation
                 Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "3",
  pages =        "45:1--45:??",
  month =        apr,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3555809",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Fri Mar 31 09:53:45 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3555809",
  abstract =     "With the success of Graph Neural Network (GNN) in
                 network data, some GNN-based representation learning
                 methods for networks have emerged recently. Variational
                 Graph Autoencoder (VGAE) is a basic GNN framework for
                 network representation. Its purpose is to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2023:CTIa,
  author =       "Gongqing Wu and Liangzhu Zhou and Jiazhu Xia and Lei
                 Li and Xianyu Bao and Xindong Wu",
  title =        "Crowdsourcing Truth Inference Based on Label
                 Confidence Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "46:1--46:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3556545",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3556545",
  abstract =     "Truth inference can help solve some difficult problems
                 of data integration in crowdsourcing. Crowdsourced
                 workers are not experts and their labeling ability
                 varies greatly; therefore, in practical applications,
                 it is difficult to determine whether the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sehnan:2023:DSI,
  author =       "Dhruv Sehnan and Vasu Goel and Sarah Masud and Chhavi
                 Jain and Vikram Goyal and Tanmoy Chakraborty",
  title =        "{DiVA}: a Scalable, Interactive and Customizable
                 Visual Analytics Platform for Information Diffusion on
                 Large Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "47:1--47:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3558771",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558771",
  abstract =     "With an increasing outreach of digital platforms in
                 our lives, researchers have taken a keen interest in
                 studying different facets of social interactions.
                 Analyzing the spread of information ( aka diffusion)
                 has brought forth multiple research areas such
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{E:2023:CEC,
  author =       "Jinlong E. and Mo Li and Jianqiang Huang",
  title =        "{CrowdAtlas}: Estimating Crowd Distribution within the
                 Urban Rail Transit System",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "48:1--48:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3558521",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558521",
  abstract =     "While urban rail transit systems are playing an
                 increasingly important role in meeting the
                 transportation demands of people, precise awareness of
                 how the human crowd is distributed within such a system
                 is highly necessary, which serves a range of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2023:PFL,
  author =       "Lei Yang and Jiaming Huang and Wanyu Lin and Jiannong
                 Cao",
  title =        "Personalized Federated Learning on {Non-IID} Data via
                 Group-based Meta-learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "49:1--49:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3558005",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3558005",
  abstract =     "Personalized federated learning (PFL) has emerged as a
                 paradigm to provide a personalized model that can fit
                 the local data distribution of each client. One natural
                 choice for PFL is to leverage the fast adaptation
                 capability of meta-learning, where it \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hermanns:2023:GSG,
  author =       "Judith Hermanns and Konstantinos Skitsas and Anton
                 Tsitsulin and Marina Munkhoeva and Alexander Kyster and
                 Simon Nielsen and Alexander M. Bronstein and Davide
                 Mottin and Panagiotis Karras",
  title =        "{GRASP}: Scalable Graph Alignment by Spectral
                 Corresponding Functions",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "50:1--50:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561058",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561058",
  abstract =     "What is the best way to match the nodes of two graphs?
                 This graph alignment problem generalizes graph
                 isomorphism and arises in applications from social
                 network analysis to bioinformatics. Some solutions
                 assume that auxiliary information on known matches
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Nakajima:2023:RWS,
  author =       "Kazuki Nakajima and Kazuyuki Shudo",
  title =        "Random Walk Sampling in Social Networks Involving
                 Private Nodes",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "51:1--51:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3561388",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3561388",
  abstract =     "Analysis of social networks with limited data access
                 is challenging for third parties. To address this
                 challenge, a number of studies have developed
                 algorithms that estimate properties of social networks
                 via a simple random walk. However, most existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shui:2023:LOL,
  author =       "Changjian Shui and William Wang and Ihsen Hedhli and
                 Chi Man Wong and Feng Wan and Boyu Wang and Christian
                 Gagn{\'e}",
  title =        "Lifelong Online Learning from Accumulated Knowledge",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "52:1--52:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3563947",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3563947",
  abstract =     "In this article, we formulate lifelong learning as an
                 online transfer learning procedure over consecutive
                 tasks, where learning a given task depends on the
                 accumulated knowledge. We propose a novel theoretical
                 principled framework, lifelong online. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Dai:2023:DMV,
  author =       "Shaojie Dai and Jinshuai Wang and Chao Huang and
                 Yanwei Yu and Junyu Dong",
  title =        "Dynamic Multi-View Graph Neural Networks for Citywide
                 Traffic Inference",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "53:1--53:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564754",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564754",
  abstract =     "Accurate citywide traffic inference is critical for
                 improving intelligent transportation systems with smart
                 city applications. However, this task is very
                 challenging given the limited training data, due to the
                 high cost of sensor installment and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ling:2023:STD,
  author =       "Shuai Ling and Zhe Yu and Shaosheng Cao and Haipeng
                 Zhang and Simon Hu",
  title =        "{STHAN}: Transportation Demand Forecasting with
                 Compound Spatio-Temporal Relationships",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "54:1--54:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565578",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565578",
  abstract =     "Transportation demand forecasting is a critical
                 precondition of optimal online transportation dispatch,
                 which will greatly reduce drivers' wasted mileage and
                 customers' waiting time, contributing to economic and
                 environmental sustainability. Though \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2023:MMI,
  author =       "Jiaying Liu and Feng Xia and Jing Ren and Bo Xu and
                 Guansong Pang and Lianhua Chi",
  title =        "{MIRROR}: Mining Implicit Relationships via
                 Structure-Enhanced Graph Convolutional Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "55:1--55:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564531",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564531",
  abstract =     "Data explosion in the information society drives
                 people to develop more effective ways to extract
                 meaningful information. Extracting semantic information
                 and relational information has emerged as a key mining
                 primitive in a wide variety of practical \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2023:TTA,
  author =       "Zhi Liu and Yang Chen and Feng Xia and Jixin Bian and
                 Bing Zhu and Guojiang Shen and Xiangjie Kong",
  title =        "{TAP}: Traffic Accident Profiling via Multi-Task
                 Spatio-Temporal Graph Representation Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "56:1--56:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564594",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564594",
  abstract =     "Predicting traffic accidents can help traffic
                 management departments respond to sudden traffic
                 situations promptly, improve drivers' vigilance, and
                 reduce losses caused by traffic accidents. However, the
                 causality of traffic accidents is complex and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:TRR,
  author =       "Lei Chen and Jie Cao and Haicheng Tao and Jia Wu",
  title =        "Trip Reinforcement Recommendation with Graph-based
                 Representation Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "57:1--57:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564609",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564609",
  abstract =     "Tourism is an important industry and a popular leisure
                 activity involving billions of tourists per annum. One
                 challenging problem tourists face is identifying
                 attractive Places-of-Interest (POIs) and planning the
                 personalized trip with time constraints. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:LEL,
  author =       "Huiming Chen and Huandong Wang and Quanming Yao and
                 Yong Li and Depeng Jin and Qiang Yang",
  title =        "{LoSAC}: an Efficient Local Stochastic Average Control
                 Method for Federated Optimization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "58:1--58:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3566128",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3566128",
  abstract =     "Federated optimization (FedOpt), which targets at
                 collaboratively training a learning model across a
                 large number of distributed clients, is vital for
                 federated learning. The primary concerns in FedOpt can
                 be attributed to the model divergence and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jing:2023:LSR,
  author =       "Mengyuan Jing and Yanmin Zhu and Yanan Xu and Haobing
                 Liu and Tianzi Zang and Chunyang Wang and Jiadi Yu",
  title =        "Learning Shared Representations for Recommendation
                 with Dynamic Heterogeneous Graph Convolutional
                 Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "59:1--59:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565575",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565575",
  abstract =     "Graph Convolutional Networks (GCNs) have been widely
                 used for collaborative filtering, due to their
                 effectiveness in exploiting high-order collaborative
                 signals. However, two issues have not been well
                 addressed by existing studies. First, usually only one
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:SSD,
  author =       "Yizong Wang and Dong Zhao and Yajie Ren and Desheng
                 Zhang and Huadong Ma",
  title =        "{SPAP}: Simultaneous Demand Prediction and Planning
                 for Electric Vehicle Chargers in a New City",
  journal =      j-TKDD,
  volume =       "17",
  number =       "4",
  pages =        "60:1--60:??",
  month =        may,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565577",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:29:25 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565577",
  abstract =     "For a new city that is committed to promoting Electric
                 Vehicles (EVs), it is significant to plan the public
                 charging infrastructure where charging demands are
                 high. However, it is difficult to predict charging
                 demands before the actual deployment of EV \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2023:ESM,
  author =       "Dandan Lin and Victor Junqiu Wei and Raymond Chi-Wing
                 Wong",
  title =        "Effective and Scalable Manifold Ranking-Based Image
                 Retrieval with Output Bound",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "61:1--61:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565574",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565574",
  abstract =     "Image retrieval keeps attracting a lot of attention
                 from both academic and industry over past years due to
                 its variety of useful applications. Due to the rapid
                 growth of deep learning approaches, more better feature
                 vectors of images could be discovered \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhou:2023:SPA,
  author =       "Peng Zhou and Xinwang Liu and Liang Du and Xuejun Li",
  title =        "Self-paced Adaptive Bipartite Graph Learning for
                 Consensus Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "62:1--62:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564701",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564701",
  abstract =     "Consensus clustering provides an elegant framework to
                 aggregate multiple weak clustering results to learn a
                 consensus one that is more robust and stable than a
                 single result. However, most of the existing methods
                 usually use all data for consensus \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:HTN,
  author =       "Mengran Li and Yong Zhang and Xiaoyong Li and Yuchen
                 Zhang and Baocai Yin",
  title =        "Hypergraph Transformer Neural Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "63:1--63:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565028",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565028",
  abstract =     "Graph neural networks (GNNs) have been widely used for
                 graph structure learning and achieved excellent
                 performance in tasks such as node classification and
                 link prediction. Real-world graph networks imply
                 complex and various semantic information and are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:TFF,
  author =       "Haoran Li and Zhiqiang Lv and Jianbo Li and Zhihao Xu
                 and Yue Wang and Haokai Sun and Zhaoyu Sheng",
  title =        "Traffic Flow Forecasting in the {COVID-19}: a Deep
                 Spatial-temporal Model Based on Discrete Wavelet
                 Transformation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "64:1--64:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3564753",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3564753",
  abstract =     "Traffic flow prediction has always been the focus of
                 research in the field of Intelligent Transportation
                 Systems, which is conducive to the more reasonable
                 allocation of basic transportation resources and
                 formulation of transportation policies. The spread
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2023:CTIb,
  author =       "Gongqing Wu and Xingrui Zhuo and Xianyu Bao and
                 Xuegang Hu and Richang Hong and Xindong Wu",
  title =        "Crowdsourcing Truth Inference via Reliability-Driven
                 Multi-View Graph Embedding",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "65:1--65:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3565576",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3565576",
  abstract =     "Crowdsourcing truth inference aims to assign a correct
                 answer to each task from candidate answers that are
                 provided by crowdsourced workers. A common approach is
                 to generate workers' reliabilities to represent the
                 quality of answers. Although crowdsourced \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jang:2023:SST,
  author =       "Jun-Gi Jang and U. Kang",
  title =        "Static and Streaming {Tucker} Decomposition for Dense
                 Tensors",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "66:1--66:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568682",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568682",
  abstract =     "Given a dense tensor, how can we efficiently discover
                 hidden relations and patterns in static and online
                 streaming settings? Tucker decomposition is a
                 fundamental tool to analyze multidimensional arrays in
                 the form of tensors. However, existing Tucker
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:ULH,
  author =       "Meng Wang and Boyu Li and Kun He and John Hopcroft",
  title =        "Uncovering the Local Hidden Community Structure in
                 Social Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "67:1--67:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3567597",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3567597",
  abstract =     "Hidden community is a useful concept proposed recently
                 for social network analysis. Hidden communities
                 indicate some weak communities whose most members also
                 belong to other stronger dominant communities. Dominant
                 communities could form a layer that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2023:CFU,
  author =       "Hao Liu and Qingyu Guo and Hengshu Zhu and Yanjie Fu
                 and Fuzhen Zhuang and Xiaojuan Ma and Hui Xiong",
  title =        "Characterizing and Forecasting Urban Vibrancy
                 Evolution: a Multi-View Graph Mining Perspective",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "68:1--68:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568683",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568683",
  abstract =     "Urban vibrancy describes the prosperity, diversity,
                 and accessibility of urban areas, which is vital to a
                 city's socio-economic development and sustainability.
                 While many efforts have been made for statically
                 measuring and evaluating urban vibrancy, there
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ren:2023:AMA,
  author =       "Yuyang Ren and Haonan Zhang and Peng Yu and Luoyi Fu
                 and Xinde Cao and Xinbing Wang and Guihai Chen and Fei
                 Long and Chenghu Zhou",
  title =        "{Ada-MIP}: Adaptive Self-supervised Graph
                 Representation Learning via Mutual Information and
                 Proximity Optimization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "69:1--69:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568165",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568165",
  abstract =     "Self-supervised graph-level representation learning
                 has recently received considerable attention. Given
                 varied input distributions, jointly learning graphs'
                 unique and common features is vital to downstream
                 tasks. Inspired by graph contrastive learning
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gu:2023:OJU,
  author =       "Zhibin Gu and Songhe Feng and Ruiting Hu and Gengyu
                 Lyu",
  title =        "{ONION}: Joint Unsupervised Feature Selection and
                 Robust Subspace Extraction for Graph-based Multi-View
                 Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "70:1--70:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3568684",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3568684",
  abstract =     "Graph-based Multi-View Clustering (GMVC) has received
                 extensive attention due to its ability to capture the
                 neighborhood relationship among data points from
                 diverse views. However, most existing approaches
                 construct similarity graphs from the original
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:SGU,
  author =       "Zhijie Zhang and Wenzhong Li and Wangxiang Ding and
                 Linming Zhang and Qingning Lu and Peng Hu and Tong Gui
                 and Sanglu Lu",
  title =        "{STAD-GAN}: Unsupervised Anomaly Detection on
                 Multivariate Time Series with Self-training Generative
                 Adversarial Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "71:1--71:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3572780",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572780",
  abstract =     "Anomaly detection on multivariate time series (MTS) is
                 an important research topic in data mining, which has a
                 wide range of applications in information technology,
                 financial management, manufacturing system, and so on.
                 However, the state-of-the-art \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2023:WEC,
  author =       "Hongxin Wu and Meng Han and Zhiqiang Chen and Muhang
                 Li and Xilong Zhang",
  title =        "A Weighted Ensemble Classification Algorithm Based on
                 Nearest Neighbors for Multi-Label Data Stream",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "72:1--72:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3570960",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3570960",
  abstract =     "With the rapid development of data stream, multi-label
                 algorithms for mining dynamic data become more and more
                 important. At the same time, when data distribution
                 changes, concept drift will occur, which will make the
                 existing classification models lose \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:ASA,
  author =       "Chunnan Wang and Kaixin Zhang and Hongzhi Wang and
                 Bozhou Chen",
  title =        "{Auto-STGCN}: Autonomous Spatial-Temporal Graph
                 Convolutional Network Search",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "73:1--73:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3571285",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3571285",
  abstract =     "In recent years, many spatial-temporal graph
                 convolutional network (STGCN) models are proposed to
                 deal with the spatial-temporal network data forecasting
                 problem. These STGCN models have their own advantages,
                 i.e., each of them puts forward many effective
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "73",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:SSS,
  author =       "Yufu Chen and Yanghui Rao and Shurui Chen and Zhiqi
                 Lei and Haoran Xie and Raymond Y. K. Lau and Jian Yin",
  title =        "Semi-Supervised Sentiment Classification and Emotion
                 Distribution Learning Across Domains",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "74:1--74:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3571736",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3571736",
  abstract =     "In this study, sentiment classification and emotion
                 distribution learning across domains are both
                 formulated as a semi-supervised domain adaptation
                 problem, which utilizes a small amount of labeled
                 documents in the target domain for model training. By
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "74",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tang:2023:DSB,
  author =       "Hui Tang and Xun Liang and Yuhui Guo and Xiangping
                 Zheng and Bo Wu and Sensen Zhang and Zhiying Li",
  title =        "Diffuse and Smooth: Beyond Truncated Receptive Field
                 for Scalable and Adaptive Graph Representation
                 Learning",
  journal =      j-TKDD,
  volume =       "17",
  number =       "5",
  pages =        "75:1--75:??",
  month =        jun,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3572781",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Apr 8 07:47:58 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572781",
  abstract =     "As the scope of receptive field and the depth of Graph
                 Neural Networks (GNNs) are two completely orthogonal
                 aspects for graph learning, existing GNNs often have
                 shallow layers with truncated-receptive field and far
                 from achieving satisfactory performance. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "75",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2023:PPP,
  author =       "Xiao Liu and Bonan Gao and Basem Suleiman and Han You
                 and Zisu Ma and Yu Liu and Ali Anaissi",
  title =        "Privacy-Preserving Personalized Fitness Recommender
                 System {P$^3$FitRec}: a Multi-level Deep Learning
                 Approach",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "76:1--76:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3572899",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3572899",
  abstract =     "Recommender systems have been successfully used in
                 many domains with the help of machine learning
                 algorithms. However, such applications tend to use
                 multi-dimensional user data, which has raised
                 widespread concerns about the breach of users' privacy.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "76",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2023:TCS,
  author =       "Jie Yang and Zhixiao Wang and Xiaobin Rui and Yahui
                 Chai and Philip S. Yu and Lichao Sun",
  title =        "Triadic Closure Sensitive Influence Maximization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "77:1--77:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3573011",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3573011",
  abstract =     "The influence are not linked to any footnote in the
                 text. Please check and suggest. maximization problem
                 aims at selecting the k most influential nodes (i.e.,
                 seed nodes) from a social network, where the nodes can
                 maximize the number of influenced nodes \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "77",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cousins:2023:BBC,
  author =       "Cyrus Cousins and Chloe Wohlgemuth and Matteo
                 Riondato",
  title =        "{Bavarian}: Betweenness Centrality Approximation with
                 Variance-aware {Rademacher} Averages",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "78:1--78:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3577021",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577021",
  abstract =     "``[A]llain Gersten, Hopfen, und Wasser'' --- 1516
                 Reinheitsgebot We present Bavarian, a collection of
                 sampling-based algorithms for approximating the
                 Betweenness Centrality (BC) of all vertices in a graph.
                 Our algorithms use Monte-Carlo Empirical Rademacher
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "78",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jalali:2023:FIF,
  author =       "Zeinab S. Jalali and Qilan Chen and Shwetha M.
                 Srikanta and Weixiang Wang and Myunghwan Kim and Hema
                 Raghavan and Sucheta Soundarajan",
  title =        "Fairness of Information Flow in Social Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "79:1--79:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578268",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578268",
  abstract =     "Social networks form a major parts of people's lives,
                 and individuals often make important life decisions
                 based on information that spreads through these
                 networks. For this reason, it is important to know
                 whether individuals from different protected \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "79",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:EEG,
  author =       "Yunyi Li and Yongjing Hao and Pengpeng Zhao and
                 Guanfeng Liu and Yanchi Liu and Victor S. Sheng and
                 Xiaofang Zhou",
  title =        "Edge-enhanced Global Disentangled Graph Neural Network
                 for Sequential Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "80:1--80:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3577928",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577928",
  abstract =     "Sequential recommendation has been a widely popular
                 topic of recommender systems. Existing works have
                 contributed to enhancing the prediction ability of
                 sequential recommendation systems based on various
                 methods, such as recurrent networks and self-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "80",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Feng:2023:HDP,
  author =       "Wenjie Feng and Shenghua Liu and Xueqi Cheng",
  title =        "Hierarchical Dense Pattern Detection in Tensors",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "81:1--81:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3577022",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3577022",
  abstract =     "Dense subtensor detection gains remarkable success in
                 spotting anomalies and fraudulent behaviors for
                 multi-aspect data (i.e., tensors), like in social media
                 and event streams. Existing methods detect the densest
                 subtensors flatly and separately, with the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "81",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2023:GCD,
  author =       "Jingmin Huang and Bowei Chen and Zhi Yan and Iadh
                 Ounis and Jun Wang",
  title =        "{GEO}: a Computational Design Framework for Automotive
                 Exterior Facelift",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "82:1--82:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578521",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578521",
  abstract =     "Exterior facelift has become an effective method for
                 automakers to boost the consumers' interest in an
                 existing car model before it is redesigned. To support
                 the automotive facelift design process, this study
                 develops a novel computational framework --- \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "82",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Singh:2023:MSM,
  author =       "Karandeep Singh and Seungeon Lee and Giuseppe (Joe)
                 Labianca and Jesse Michael Fagan and Meeyoung Cha",
  title =        "Multi-Stage Machine Learning Model for Hierarchical
                 Tie Valence Prediction",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "83:1--83:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3579096",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579096",
  abstract =     "Individuals interacting in organizational settings
                 involving varying levels of formal hierarchy naturally
                 form a complex network of social ties having different
                 tie valences (e.g., positive and negative connections).
                 Social ties critically affect \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "83",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Su:2023:NMF,
  author =       "Sixing Su and Jiewen Guan and Bilian Chen and Xin
                 Huang",
  title =        "Nonnegative Matrix Factorization Based on Node
                 Centrality for Community Detection",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "84:1--84:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578520",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578520",
  abstract =     "Community detection is an important topic in network
                 analysis, and recently many community detection methods
                 have been developed on top of the Nonnegative Matrix
                 Factorization (NMF) technique. Most NMF-based community
                 detection methods only utilize the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "84",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:EEA,
  author =       "Yuxiang Wang and Jun Liu and Xiaoliang Xu and Xiangyu
                 Ke and Tianxing Wu and Xiaoxuan Gou",
  title =        "Efficient and Effective Academic Expert Finding on
                 Heterogeneous Graphs through {$ (k, P) $}-Core based
                 Embedding",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "85:1--85:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578365",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578365",
  abstract =     "Expert finding is crucial for a wealth of applications
                 in both academia and industry. Given a user query and
                 trove of academic papers, expert finding aims at
                 retrieving the most relevant experts for the query,
                 from the academic papers. Existing studies \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "85",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:SUI,
  author =       "Yongjie Wang and Ke Wang and Cheng Long and Chunyan
                 Miao",
  title =        "Summarizing User-item Matrix By Group Utility
                 Maximization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "86:1--86:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578586",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578586",
  abstract =     "A user-item utility matrix represents the utility (or
                 preference) associated with each (user, item) pair,
                 such as citation counts, rating/vote on items or
                 locations, and clicks on items. A high utility value
                 indicates a strong association of the pair. In
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "86",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ni:2023:MBP,
  author =       "Peikun Ni and Jianming Zhu and Guoqing Wang",
  title =        "Misinformation Blocking Problem in Virtual and Real
                 Interconversion Social Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "87:1--87:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3578936",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3578936",
  abstract =     "With the in-depth development of intelligent media
                 technology, online and offline fusion, reality and
                 virtual entanglement, information content
                 generalization, the boundary between positive and
                 negative information is blurred, all kinds of
                 misinformation \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "87",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ren:2023:SFB,
  author =       "Jinjun Ren and Yuping Wang and Xiyan Deng",
  title =        "Slack-Factor-Based Fuzzy Support Vector Machine for
                 Class Imbalance Problems",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "88:1--88:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3579050",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3579050",
  abstract =     "Class imbalance and noisy data widely exist in
                 real-world problems, and the support vector machine
                 (SVM) is hard to construct good classifiers on these
                 data. Fuzzy SVMs (FSVMs), as variants of SVM, use a
                 fuzzy membership function both to reflect the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "88",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:TWP,
  author =       "Lei Li and Zhiyuan Liu and Zan Zhang and Huanhuan Chen
                 and Xindong Wu",
  title =        "Three-way Preference Completion via Preference Graph",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "89:1--89:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3580368",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580368",
  abstract =     "With the personal partial rankings from agents over a
                 subset of alternatives, the goal of preference
                 completion is to infer the agent's personalized
                 preference over all alternatives including those the
                 agent has not yet handled from uncertain preference
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "89",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Khokhar:2023:DPR,
  author =       "Rashid Hussain Khokhar and Benjamin C. M. Fung and
                 Farkhund Iqbal and Khalil Al-Hussaeni and Mohammed
                 Hussain",
  title =        "Differentially Private Release of Heterogeneous
                 Network for Managing Healthcare Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "6",
  pages =        "90:1--90:??",
  month =        jul,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3580367",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Apr 17 11:51:51 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3580367",
  abstract =     "With the increasing adoption of digital health
                 platforms through mobile apps and online services,
                 people have greater flexibility connecting with medical
                 practitioners, pharmacists, and laboratories and
                 accessing resources to manage their own health-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "90",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:RCM,
  author =       "Mimi Zhang and Andrew Parnell",
  title =        "Review of Clustering Methods for Functional Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "91:1--91:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3581789",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3581789",
  abstract =     "Functional data clustering is to identify
                 heterogeneous morphological patterns in the continuous
                 functions underlying the discrete
                 measurements/observations. Application of functional
                 data clustering has appeared in many publications
                 across various fields \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "91",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:MLV,
  author =       "Ling Chen and Dandan Lyu and Shanshan Yu and Gencai
                 Chen",
  title =        "Multi-Level Visual Similarity Based Personalized
                 Tourist Attraction Recommendation Using Geo-Tagged
                 Photos",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "92:1--92:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3582015",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582015",
  abstract =     "Geo-tagged photo-based tourist attraction
                 recommendation can discover users' travel preferences
                 from their taken photos, so as to recommend suitable
                 tourist attractions to them. However, existing visual
                 content-based methods cannot fully exploit the user
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "92",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2023:OSF,
  author =       "Wanyue Xu and Zhongzhi Zhang",
  title =        "Optimal Scale-Free Small-World Graphs with Minimum
                 Scaling of Cover Time",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "93:1--93:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3583691",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3583691",
  abstract =     "The cover time of random walks on a graph has found
                 wide practical applications in different fields of
                 computer science, such as crawling and searching on the
                 World Wide Web and query processing in sensor networks,
                 with the application effects dependent \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "93",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sheth:2023:CDI,
  author =       "Paras Sheth and Ruocheng Guo and Lu Cheng and Huan Liu
                 and Kasim Sel{\c{c}}uk Candan",
  title =        "Causal Disentanglement for Implicit Recommendations
                 with Network Information",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "94:1--94:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3582435",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3582435",
  abstract =     "Online user engagement is highly influenced by various
                 machine learning models, such as recommender systems.
                 These systems recommend new items to the user based on
                 the user's historical interactions. Implicit
                 recommender systems reflect a binary setting \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "94",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:ESM,
  author =       "Yihong Zhang and Xiu Susie Fang and Takahiro Hara",
  title =        "Evolving Social Media Background Representation with
                 Frequency Weights and Co-Occurrence Graphs",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "95:1--95:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3585389",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3585389",
  abstract =     "Social media as a background information source has
                 been utilized in many practical computational tasks,
                 such as stock price prediction, epidemic tracking, and
                 product recommendation. However, proper representation
                 of an evolving social media background \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "95",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:NWV,
  author =       "Huiru Li and Liangxiao Jiang and Siqing Xue",
  title =        "Neighborhood Weighted Voting-Based Noise Correction
                 for Crowdsourcing",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "96:1--96:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3586998",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3586998",
  abstract =     "In crowdsourcing scenarios, we can obtain each
                 instance's multiple noisy labels set from different
                 crowd workers and then use a ground truth inference
                 algorithm to infer its integrated label. Despite the
                 effectiveness of ground truth inference algorithms,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "96",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:DNE,
  author =       "He Li and Duo Jin and Xuejiao Li and Jianbin Huang and
                 Xiaoke Ma and Jiangtao Cui and Deshuang Huang and
                 Shaojie Qiao and Jaesoo Yoo",
  title =        "{DMGF-Net}: an Efficient Dynamic Multi-Graph Fusion
                 Network for Traffic Prediction",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "97:1--97:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3586164",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3586164",
  abstract =     "Traffic prediction is the core task of intelligent
                 transportation system (ITS) and accurate traffic
                 prediction can greatly improve the utilization of
                 public resources. Dynamic interaction of multiple
                 spatial relationships will influence the accuracy of
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "97",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2023:CDC,
  author =       "Boxiang Zhao and Shuliang Wang and Lianhua Chi and Qi
                 Li and Xiaojia Liu and Jing Geng",
  title =        "Causal Discovery via Causal Star Graphs",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "98:1--98:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3586997",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3586997",
  abstract =     "Discovering causal relationships among observed
                 variables is an important research focus in data
                 mining. Existing causal discovery approaches are mainly
                 based on constraint-based methods and functional causal
                 models (FCMs). However, the constraint-based \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "98",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:GDL,
  author =       "Dexian Wang and Tianrui Li and Ping Deng and Fan Zhang
                 and Wei Huang and Pengfei Zhang and Jia Liu",
  title =        "A Generalized Deep Learning Clustering Algorithm Based
                 on Non-Negative Matrix Factorization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "99:1--99:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3584862",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3584862",
  abstract =     "Clustering is a popular research topic in the field of
                 data mining, in which the clustering method based on
                 non-negative matrix factorization (NMF) has been widely
                 employed. However, in the update process of NMF, there
                 is no learning rate to guide the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "99",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Biswas:2023:RIM,
  author =       "Tarun Kumer Biswas and Alireza Abbasi and Ripon Kumar
                 Chakrabortty",
  title =        "Robust Influence Maximization Under Both Aleatory and
                 Epistemic Uncertainty",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "100:1--100:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587100",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587100",
  abstract =     "Uncertainty is ubiquitous in almost every real-life
                 optimization problem, which must be effectively managed
                 to get a robust outcome. This is also true for the
                 Influence Maximization (IM) problem, which entails
                 locating a set of influential users within a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "100",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Guo:2023:DAD,
  author =       "Yuhui Guo and Xun Liang and Bo Wu and Xiangping Zheng
                 and Xuan Zhang",
  title =        "Dual-aware Domain Mining and Cross-aware Supervision
                 for Weakly-supervised Semantic Segmentation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "101:1--101:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589343",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589343",
  abstract =     "Weakly Supervised Semantic Segmentation with
                 image-level annotation uses localization maps from the
                 classifier to generate pseudo labels. However, such
                 localization maps focus only on sparse salient object
                 regions, it is difficult to generate high-quality
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "101",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2023:FIU,
  author =       "Jiezhu Cheng and Kaizhu Huang and Zibin Zheng",
  title =        "Fitting Imbalanced Uncertainties in Multi-output Time
                 Series Forecasting",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "102:1--102:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3584704",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3584704",
  abstract =     "We focus on multi-step ahead time series forecasting
                 with the multi-output strategy. From the perspective of
                 multi-task learning (MTL), we recognize imbalanced
                 uncertainties between prediction tasks of different
                 future time steps. Unexpectedly, trained by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "102",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:MVE,
  author =       "Xuanqi Zhang and Qiangqiang Shen and Yongyong Chen and
                 Guokai Zhang and Zhongyun Hua and Jingyong Su",
  title =        "Multi-view Ensemble Clustering via Low-rank and Sparse
                 Decomposition: From Matrix to Tensor",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "103:1--103:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589768",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589768",
  abstract =     "As a significant extension of classical clustering
                 methods, ensemble clustering first generates multiple
                 basic clusterings and then fuses them into one
                 consensus partition by solving a problem concerning
                 graph partition with respect to the co-association
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "103",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:DDQ,
  author =       "Sensen Zhang and Xun Liang and Hui Tang and Xiangping
                 Zheng and Alex X. Zhang and Yuefeng Ma",
  title =        "{DuCape}: Dual Quaternion and Capsule Network-Based
                 Temporal Knowledge Graph Embedding",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "104:1--104:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589644",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589644",
  abstract =     "Recently, with the development of temporal knowledge
                 graph technology, more and more Temporal Knowledge
                 Graph Embedded (TKGE) models have been developed. The
                 effectiveness of TKGE largely depends on the ability to
                 model intrinsic relation patterns and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "104",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:TID,
  author =       "Hao Wang and Bin Guo and Jiaqi Liu and Yasan Ding and
                 Zhiwen Yu",
  title =        "Towards Informative and Diverse Dialogue Systems Over
                 Hierarchical Crowd Intelligence Knowledge Graph",
  journal =      j-TKDD,
  volume =       "17",
  number =       "7",
  pages =        "105:1--105:??",
  month =        aug,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3583758",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:55 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3583758",
  abstract =     "Knowledge-enhanced dialogue systems aim at generating
                 factually correct and coherent responses by reasoning
                 over knowledge sources, which is a promising research
                 trend. The truly harmonious human-agent dialogue
                 systems need to conduct engaging \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "105",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ragab:2023:ABS,
  author =       "Mohamed Ragab and Emadeldeen Eldele and Wee Ling Tan
                 and Chuan-Sheng Foo and Zhenghua Chen and Min Wu and
                 Chee-Keong Kwoh and Xiaoli Li",
  title =        "{ADATIME}: a Benchmarking Suite for Domain Adaptation
                 on Time Series Data",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "106:1--106:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587937",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587937",
  abstract =     "Unsupervised domain adaptation methods aim at
                 generalizing well on unlabeled test data that may have
                 a different (shifted) distribution from the training
                 data. Such methods are typically developed on image
                 data, and their application to time series data
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "106",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Halstead:2023:CDM,
  author =       "Ben Halstead and Yun Sing Koh and Patricia Riddle and
                 Mykola Pechenizkiy and Albert Bifet",
  title =        "Combining Diverse Meta-Features to Accurately Identify
                 Recurring Concept Drift in Data Streams",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "107:1--107:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587098",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587098",
  abstract =     "Learning from streaming data is challenging as the
                 distribution of incoming data may change over time, a
                 phenomenon known as concept drift. The predictive
                 patterns, or experience learned under one distribution
                 may become irrelevant as conditions change \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "107",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shi:2023:KFD,
  author =       "Linrui Shi and Zheng Zhang and Zizhu Fan and Chao Xi
                 and Zhengming Li and Gaochang Wu",
  title =        "{Kernel Fisher Dictionary Transfer Learning}",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "108:1--108:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3588575",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3588575",
  abstract =     "Dictionary learning is an efficient knowledge
                 representation method that can learn the essential
                 features of data. Traditional dictionary learning
                 methods are difficult to obtain nonlinear information
                 when processing large-scale and high-dimensional
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "108",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2023:GNN,
  author =       "Heli Sun and Miaomiao Sun and Xuechun Liu and Linlin
                 Zhu and Liang He and Xiaolin Jia and Yuan Chen",
  title =        "Graph Neural Networks with Motisf-aware for Tenuous
                 Subgraph Finding",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "109:1--109:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589643",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589643",
  abstract =     "Tenuous subgraph finding aims to detect a subgraph
                 with few social interactions and weak relationships
                 among nodes. Despite significant efforts made on this
                 task, they are mostly carried out in view of
                 graph-structured data. These methods depend on
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "109",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2023:LES,
  author =       "Likang Wu and Hongke Zhao and Zhi Li and Zhenya Huang
                 and Qi Liu and Enhong Chen",
  title =        "Learning the Explainable Semantic Relations via
                 Unified Graph Topic-Disentangled Neural Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "110:1--110:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589964",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589964",
  abstract =     "Graph Neural Networks (GNNs) such as Graph
                 Convolutional Networks (GCNs) can effectively learn
                 node representations via aggregating neighbors based on
                 the relation graph. However, despite a few exceptions,
                 most of the previous work in this line does not
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "110",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jia:2023:SSB,
  author =       "Bohan Jia and Jian Cao and Shiyou Qian and Nengjun Zhu
                 and Xin Dong and Liang Zhang and Lei Cheng and Linjian
                 Mo",
  title =        "{SMONE}: a Session-based Recommendation Model Based on
                 Neighbor Sessions with Similar Probabilistic
                 Intentions",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "111:1--111:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587099",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587099",
  abstract =     "A session-based recommendation system (SRS) tries to
                 predict the next possible choice of anonymous users. In
                 recent years, graph neural network (GNN) models have
                 been successfully applied to SRSs and have achieved
                 great success. Using GNN models in SRSs, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "111",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hassan:2023:CGD,
  author =       "Zohair Raza Hassan and Sarwan Ali and Imdadullah Khan
                 and Mudassir Shabbir and Waseem Abbas",
  title =        "Computing Graph Descriptors on Edge Streams",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "112:1--112:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3591468",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3591468",
  abstract =     "Feature extraction is an essential task in graph
                 analytics. These feature vectors, called graph
                 descriptors, are used in downstream vector-space-based
                 graph analysis models. This idea has proved fruitful in
                 the past, with spectral-based graph descriptors
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "112",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Canfora:2023:NCT,
  author =       "Gerardo Canfora and Francesco Mercaldo and Antonella
                 Santone",
  title =        "A Novel Classification Technique based on Formal
                 Methods",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "113:1--113:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592796",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592796",
  abstract =     "In last years, we are witnessing a growing interest in
                 the application of supervised machine learning
                 techniques in the most disparate fields. One winning
                 factor of machine learning is represented by its
                 ability to easily create models, as it does not
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "113",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lin:2023:MVG,
  author =       "Bei Lin and You Li and Ning Gui and Zhuopeng Xu and
                 Zhiwu Yu",
  title =        "Multi-view Graph Representation Learning Beyond
                 Homophily",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "114:1--114:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592858",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592858",
  abstract =     "Unsupervised graph representation learning (GRL) aims
                 at distilling diverse graph information into
                 task-agnostic embeddings without label supervision. Due
                 to a lack of support from labels, recent representation
                 learning methods usually adopt self-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "114",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tajeuna:2023:MRS,
  author =       "Etienne Gael Tajeuna and Mohamed Bouguessa and
                 Shengrui Wang",
  title =        "Modeling Regime Shifts in Multiple Time Series",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "115:1--115:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592857",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592857",
  abstract =     "We investigate the problem of discovering and modeling
                 regime shifts in an ecosystem comprising multiple time
                 series known as co-evolving time series. Regime shifts
                 refer to the changing behaviors exhibited by series at
                 different time intervals. Learning \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "115",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Shi:2023:ACS,
  author =       "Dan Shi and Lei Zhu and Xiao Dong and Xuemeng Song and
                 Jingjing Li and Zhiyong Cheng",
  title =        "Adaptive Collaborative Soft Label Learning for
                 Unsupervised Multi-View Feature Selection",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "116:1--116:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3591467",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3591467",
  abstract =     "Unsupervised multi-view feature selection aims to
                 select informative features with multi-view features
                 and unsupervised learning. It is a challenging problem
                 due to the absence of explicit semantic supervision.
                 Recently, graph theory and hard pseudo-label \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "116",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:CIT,
  author =       "Hao Zhang and Yewei Xia and Kun Zhang and Shuigeng
                 Zhou and Jihong Guan",
  title =        "Conditional Independence Test Based on Residual
                 Similarity",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "117:1--117:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3593810",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3593810",
  abstract =     "Recently, many regression-based conditional
                 independence (CI) test methods have been proposed to
                 solve the problem of causal discovery. These methods
                 provide alternatives to test CI of x,y given Z by first
                 removing the information of the controlling set Z
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "117",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yuan:2023:IVD,
  author =       "Junkun Yuan and Xu Ma and Ruoxuan Xiong and Mingming
                 Gong and Xiangyu Liu and Fei Wu and Lanfen Lin and Kun
                 Kuang",
  title =        "Instrumental Variable-Driven Domain Generalization
                 with Unobserved Confounders",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "118:1--118:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3595380",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3595380",
  abstract =     "Domain generalization (DG) aims to learn from multiple
                 source domains a model that can generalize well on
                 unseen target domains. Existing DG methods mainly learn
                 the representations with invariant marginal
                 distribution of the input features, however, the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "118",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qin:2023:CBI,
  author =       "Xi Qin and Cheng Zhong and Hai Xiang Lin",
  title =        "Community-Based Influence Maximization Using Network
                 Embedding in Dynamic Heterogeneous Social Networks",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "119:1--119:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3594544",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594544",
  abstract =     "Influence maximization (IM) is a very important issue
                 in social network diffusion analysis. The topology of
                 real social network is large-scale, dynamic, and
                 heterogeneous. The heterogeneity, and continuous
                 expansion and evolution of social network pose a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "119",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhuang:2023:CLB,
  author =       "Jiabo Zhuang and Shunmei Meng and Jing Zhang and
                 Victor S. Sheng",
  title =        "Contrastive Learning Based Graph Convolution Network
                 for Social Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "8",
  pages =        "120:1--120:??",
  month =        sep,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3587268",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jul 3 07:15:57 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3587268",
  abstract =     "Exploiting social networks is expected to enhance the
                 performance of recommender systems when interaction
                 information is sparse. Existing social recommendation
                 models focus on modeling multi-graph structures and
                 then aggregating the information from these \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "120",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:RNR,
  author =       "Liang Zhang and Cheng Long",
  title =        "Road Network Representation Learning: a Dual
                 Graph-based Approach",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "121:1--121:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3592859",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3592859",
  abstract =     "Road network is a critical infrastructure powering
                 many applications including transportation, mobility
                 and logistics in real life. To leverage the input of a
                 road network across these different applications, it is
                 necessary to learn the representations \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "121",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Syed:2023:SST,
  author =       "Tahir Syed and Behroz Mirza",
  title =        "Self-supervision for Tabular Data by Learning to
                 Predict Additive Homoskedastic {Gaussian} Noise as
                 Pretext",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "122:1--122:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3594720",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3594720",
  abstract =     "The lack of scalability of data annotation translates
                 to the need to decrease dependency on labels.
                 Self-supervision offers a solution with data training
                 themselves. However, it has received relatively less
                 attention on tabular data, data that drive a \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "122",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:HCT,
  author =       "Xiaona Li and Zhu Wang and Xindong Chen and Bin Guo
                 and Zhiwen Yu",
  title =        "A Hybrid Continuous-Time Dynamic Graph Representation
                 Learning Model by Exploring Both Temporal and
                 Repetitive Information",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "123:1--123:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3596447",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3596447",
  abstract =     "Recently, dynamic graph representation learning has
                 attracted more and more attention from both academic
                 and industrial communities due to its capabilities of
                 capturing different real-world phenomena. For a dynamic
                 graph represented as a sequence of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "123",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ou:2023:STS,
  author =       "Junjie Ou and Haiming Jin and Xiaocheng Wang and Hao
                 Jiang and Xinbing Wang and Chenghu Zhou",
  title =        "{STA-TCN}: Spatial-temporal Attention over Temporal
                 Convolutional Network for Next Point-of-interest
                 Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "124:1--124:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3596497",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3596497",
  abstract =     "Recent years have witnessed a vastly increasing
                 popularity of location-based social networks (LBSNs),
                 which facilitates studies on the next Point-of-Interest
                 (POI) recommendation problem. A user's POI visiting
                 behavior shows the sequential transition. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "124",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2023:MLS,
  author =       "Shaowei Jiang and Wei He and Lizhen Cui and Yonghui Xu
                 and Lei Liu",
  title =        "Modeling Long- and Short-Term User Preferences via
                 Self-Supervised Learning for Next {POI}
                 Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "125:1--125:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597211",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597211",
  abstract =     "With the accumulation of check-in data from
                 location-based services, next Point-of-Interest (POI)
                 recommendations are gaining increasing attention. It is
                 well known that the spatio-temporal contextual
                 information of user check-in behavior plays a crucial
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "125",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jang:2023:AOS,
  author =       "Jun-Gi Jang and Sooyeon Shim and Vladimir Egay and
                 Jeeyong Lee and Jongmin Park and Suhyun Chae and U.
                 Kang",
  title =        "Accurate Open-Set Recognition for Memory Workload",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "126:1--126:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597027",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597027",
  abstract =     "How can we accurately identify new memory workloads
                 while classifying known memory workloads? Verifying
                 DRAM (Dynamic Random Access Memory) using various
                 workloads is an important task to guarantee the quality
                 of DRAM. A crucial component in the process \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "126",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Qin:2023:TBT,
  author =       "Meng Qin and Chaorui Zhang and Bo Bai and Gong Zhang
                 and Dit-Yan Yeung",
  title =        "Towards a Better Tradeoff between Quality and
                 Efficiency of Community Detection: an Inductive
                 Embedding Method across Graphs",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "127:1--127:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3596605",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3596605",
  abstract =     "Many network applications can be formulated as NP-hard
                 combinatorial optimization problems of community
                 detection (CD) that partitions nodes of a graph into
                 several groups with dense linkage. Most existing CD
                 methods are transductive, which independently
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "127",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2023:LRR,
  author =       "Haoran Chen and Xu Chen and Hongwei Tao and Zuhe Li
                 and Xiao Wang",
  title =        "Low-rank Representation with Adaptive Dimensionality
                 Reduction via Manifold Optimization for Clustering",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "128:1--128:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3589767",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3589767",
  abstract =     "The dimensionality reduction techniques are often used
                 to reduce data dimensionality for computational
                 efficiency or other purposes in existing low-rank
                 representation (LRR)-based methods. However, the two
                 steps of dimensionality reduction and learning
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "128",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2023:TTP,
  author =       "Ye Liu and Han Wu and Zhenya Huang and Hao Wang and
                 Yuting Ning and Jianhui Ma and Qi Liu and Enhong Chen",
  title =        "{TechPat}: Technical Phrase Extraction for Patent
                 Mining",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "129:1--129:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3596603",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3596603",
  abstract =     "In recent years, due to the explosive growth of patent
                 applications, patent mining has drawn extensive
                 attention and interest. An important issue of patent
                 mining is that of recognizing the technologies
                 contained in patents, which serves as a fundamental
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "129",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:MRA,
  author =       "Chunyang Wang and Yanmin Zhu and Haobing Liu and
                 Tianzi Zang and Ke Wang and Jiadi Yu",
  title =        "Multifaceted Relation-aware Meta-learning with Dual
                 Customization for User Cold-start Recommendation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "130:1--130:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597458",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597458",
  abstract =     "User cold-start scenarios pose great challenges to
                 recommendation systems in accurately capturing user
                 preferences with sparse interaction records. Besides
                 incorporating auxiliary information to enrich user/item
                 representations, recent studies under the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "130",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2023:ITB,
  author =       "Zhiwen Yu and Minling Dang and Qilong Wu and Liming
                 Chen and Yujin Xie and Yu Wang and Bin Guo",
  title =        "An Information Theory Based Method for Quantifying the
                 Predictability of Human Mobility",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "131:1--131:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597500",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597500",
  abstract =     "Research on human mobility drives the development of
                 economy and society. How to predict when and where one
                 will go accurately is one of the core research
                 questions. Existing work is mainly concerned with
                 performance of mobility prediction models. Since
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "131",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ramezani:2023:JID,
  author =       "Maryam Ramezani and Aryan Ahadinia and Amirmohammad
                 Ziaei Bideh and Hamid R. Rabiee",
  title =        "Joint Inference of Diffusion and Structure in
                 Partially Observed Social Networks Using Coupled Matrix
                 Factorization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "132:1--132:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3599237",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3599237",
  abstract =     "Access to complete data in large-scale networks is
                 often infeasible. Therefore, the problem of missing
                 data is a crucial and unavoidable issue in the analysis
                 and modeling of real-world social networks. However,
                 most of the research on different aspects \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "132",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2023:SIM,
  author =       "Yandi Li and Haobo Gao and Yunxuan Gao and Jianxiong
                 Guo and Weili Wu",
  title =        "A Survey on Influence Maximization: From an {ML}-Based
                 Combinatorial Optimization",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "133:1--133:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604559",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604559",
  abstract =     "Influence Maximization (IM) is a classical
                 combinatorial optimization problem, which can be widely
                 used in mobile networks, social computing, and
                 recommendation systems. It aims at selecting a small
                 number of users such that maximizing the influence
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "133",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2023:MLF,
  author =       "Zan Zhang and Zhe Zhang and Jialu Yao and Lin Liu and
                 Jiuyong Li and Gongqing Wu and Xindong Wu",
  title =        "Multi-Label Feature Selection Via Adaptive Label
                 Correlation Estimation",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "134:1--134:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604560",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604560",
  abstract =     "In multi-label learning, each instance is associated
                 with multiple labels simultaneously. Multi-label data
                 often have noisy, irrelevant, and redundant features of
                 high dimensionality. Multi-label feature selection has
                 received considerable attention as an \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "134",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ren:2023:CCG,
  author =       "Siyuan Ren and Bin Guo and Ke Li and Qianru Wang and
                 Qinfen Wang and Zhiwen Yu",
  title =        "{CoupledGT}: Coupled Geospatial-temporal Data Modeling
                 for Air Quality Prediction",
  journal =      j-TKDD,
  volume =       "17",
  number =       "9",
  pages =        "135:1--135:??",
  month =        nov,
  year =         "2023",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604616",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Sat Aug 19 07:15:21 MDT 2023",
  bibsource =    "http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604616",
  abstract =     "Air pollution seriously affects public health, while
                 effective air quality prediction remains a challenging
                 problem since the complex spatial-temporal couplings
                 exist in multi-area monitoring data of the city.
                 Current approaches rarely consider relative \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "135",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2024:HHG,
  author =       "Youru Li and Zhenfeng Zhu and Xiaobo Guo and Shaoshuai
                 Li and Yuchen Yang and Yao Zhao",
  title =        "{HGV4Risk}: Hierarchical Global View-guided Sequence
                 Representation Learning for Risk Prediction",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "1:1--1:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605895",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605895",
  abstract =     "Risk prediction, usually achieved by learning
                 representations from patient's physiological sequence
                 or user's behavioral sequence data, and has been widely
                 applied in healthcare and finance. Despite that, some
                 recent time-aware deep learning methods have \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "1",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Amagata:2024:EDP,
  author =       "Daichi Amagata and Takahiro Hara",
  title =        "Efficient Density-peaks Clustering Algorithms on
                 Static and Dynamic Data in {Euclidean} Space",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "2:1--2:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607873",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607873",
  abstract =     "Clustering multi-dimensional points is a fundamental
                 task in many fields, and density-based clustering
                 supports many applications because it can discover
                 clusters of arbitrary shapes. This article addresses
                 the problem of Density-Peaks Clustering (DPC) in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "2",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Deng:2024:TNF,
  author =       "Jiewen Deng and Jinliang Deng and Du Yin and Renhe
                 Jiang and Xuan Song",
  title =        "{TTS-Norm}: Forecasting Tensor Time Series via
                 Multi-Way Normalization",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "3:1--3:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3605894",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3605894",
  abstract =     "Tensor time series (TTS) data, a generalization of
                 one-dimensional time series on a high-dimensional
                 space, is ubiquitous in real-world applications.
                 Compared to modeling time series or multivariate time
                 series, which has received much attention and
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "3",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Moreo:2024:MLQ,
  author =       "Alejandro Moreo and Manuel Francisco and Fabrizio
                 Sebastiani",
  title =        "Multi-Label Quantification",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "4:1--4:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3606264",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3606264",
  abstract =     "Quantification, variously called supervised prevalence
                 estimation or learning to quantify, is the supervised
                 learning task of generating predictors of the relative
                 frequencies (a.k.a. prevalence values ) of the classes
                 of interest in unlabelled data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "4",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2024:HSF,
  author =       "Chunkai Zhang and Yuting Yang and Zilin Du and
                 Wensheng Gan and Philip S. Yu",
  title =        "{HUSP-SP}: Faster Utility Mining on Sequence Data",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "5:1--5:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597935",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597935",
  abstract =     "High-utility sequential pattern mining (HUSPM) has
                 emerged as an important topic due to its wide
                 application and considerable popularity. However, due
                 to the combinatorial explosion of the search space when
                 the HUSPM problem encounters a low-utility \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "5",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2024:MVG,
  author =       "Zhaoliang Chen and Lele Fu and Shunxin Xiao and
                 Shiping Wang and Claudia Plant and Wenzhong Guo",
  title =        "Multi-View Graph Convolutional Networks with
                 Differentiable Node Selection",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "6:1--6:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608954",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608954",
  abstract =     "Multi-view data containing complementary and consensus
                 information can facilitate representation learning by
                 exploiting the intact integration of multi-view
                 features. Because most objects in the real world often
                 have underlying connections, organizing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "6",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Luo:2024:DLC,
  author =       "Fangyuan Luo and Jun Wu and Tao Wang",
  title =        "Discrete Listwise Content-aware Recommendation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "7:1--7:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609334",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609334",
  abstract =     "To perform online inference efficiently, hashing
                 techniques, devoted to encoding model parameters as
                 binary codes, play a key role in reducing the
                 computational cost of content-aware recommendation
                 (CAR), particularly on devices with limited computation
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "7",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2024:SGC,
  author =       "Huiyuan Li and Li Yu and Xi Niu and Youfang Leng and
                 Qihan Du",
  title =        "Sequential and Graphical Cross-Domain Recommendations
                 with a Multi-View Hierarchical Transfer Gate",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "8:1--8:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604615",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604615",
  abstract =     "Cross-domain recommender systems could potentially
                 improve the recommendation performance by means of
                 transferring abundant knowledge from the auxiliary
                 domain to the target domain. They could help address
                 some key challenges in recommender systems, such
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "8",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Mai:2024:SCC,
  author =       "Weiming Mai and Jiangchao Yao and Gong Chen and Ya
                 Zhang and Yiu-Ming Cheung and Bo Han",
  title =        "Server-Client Collaborative Distillation for Federated
                 Reinforcement Learning",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "9:1--9:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604939",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604939",
  abstract =     "Federated Learning (FL) learns a global model in a
                 distributional manner, which does not require local
                 clients to share private data. Such merit has drawn
                 lots of attention in the interaction scenarios, where
                 Federated Reinforcement Learning (FRL) emerges
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "9",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wu:2024:FRS,
  author =       "Yao Wu and Jian Cao and Guandong Xu",
  title =        "Fairness in Recommender Systems: Evaluation Approaches
                 and Assurance Strategies",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "10:1--10:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3604558",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3604558",
  abstract =     "With the wide application of recommender systems, the
                 potential impacts of recommender systems on customers,
                 item providers and other parties have attracted
                 increasing attention. Fairness, which is the quality of
                 treating people equally, is also becoming \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "10",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2024:TTD,
  author =       "Huan Wang and Guoquan Liu and Po Hu",
  title =        "{TDAN}: Transferable Domain Adversarial Network for
                 Link Prediction in Heterogeneous Social Networks",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "11:1--11:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610229",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610229",
  abstract =     "Link prediction has received increased attention in
                 social network analysis. One of the unique challenges
                 in heterogeneous social networks is link prediction in
                 new link types without verified link information, such
                 as recommending products to new \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "11",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Corbara:2024:SDD,
  author =       "Silvia Corbara and Alejandro Moreo and Fabrizio
                 Sebastiani",
  title =        "Same or Different? {Diff}-Vectors for Authorship
                 Analysis",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "12:1--12:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609226",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609226",
  abstract =     "In this article, we investigate the effects on
                 authorship identification tasks (including authorship
                 verification, closed-set authorship attribution, and
                 closed-set and open-set same-author verification) of a
                 fundamental shift in how to conceive the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "12",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2024:RRH,
  author =       "Jincheng Huang and Ping Li and Rui Huang and Na Chen
                 and Acong Zhang",
  title =        "Revisiting the Role of Heterophily in Graph
                 Representation Learning: an Edge Classification
                 Perspective",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "13:1--13:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3603378",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3603378",
  abstract =     "Graph representation learning aims at integrating node
                 contents with graph structure to learn nodes/graph
                 representations. Nevertheless, it is found that many
                 existing graph learning methods do not work well on
                 data with high heterophily level that \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "13",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Luo:2024:CBH,
  author =       "Xiao Luo and Daqing Wu and Yiyang Gu and Chong Chen
                 and Luchen Liu and Jinwen Ma and Ming Zhang and Minghua
                 Deng and Jianqiang Huang and Xian-Sheng Hua",
  title =        "Criterion-based Heterogeneous Collaborative Filtering
                 for Multi-behavior Implicit Recommendation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "14:1--14:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3611310",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611310",
  abstract =     "Recent years have witnessed the explosive growth of
                 interaction behaviors in multimedia information
                 systems, where multi-behavior recommender systems have
                 received increasing attention by leveraging data from
                 various auxiliary behaviors such as tip and \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "14",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2024:DDP,
  author =       "Huiting Liu and Yu Zhang and Peipei Li and Cheng Qian
                 and Peng Zhao and Xindong Wu",
  title =        "{DeepCPR}: Deep Path Reasoning Using Sequence of
                 User-Preferred Attributes for Conversational
                 Recommendation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "15:1--15:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610775",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610775",
  abstract =     "Conversational recommender systems (CRS) have garnered
                 significant attention in academia and industry because
                 of their ability to capture user preferences via system
                 questions and user responses. Typically, in a CRS,
                 reinforcement learning (RL) is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "15",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2024:DAD,
  author =       "Qiuyue Zhang and Yunfeng Zhang and Xunxiang Yao and
                 Shilong Li and Caiming Zhang and Peide Liu",
  title =        "A Dynamic Attributes-driven Graph Attention Network
                 Modeling on Behavioral Finance for Stock Prediction",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "16:1--16:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3611311",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3611311",
  abstract =     "Stock prediction is a challenging task due to multiple
                 influencing factors and complex market dependencies.
                 Traditional solutions are based on a single type of
                 information. With the success of multi-source
                 information in different fields, the combination
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "16",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chowdhury:2024:INC,
  author =       "Anjan Chowdhury and Sriram Srinivasan and Animesh
                 Mukherjee and Sanjukta Bhowmick and Kuntal Ghosh",
  title =        "Improving Node Classification Accuracy of {GNN}
                 through Input and Output Intervention",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "17:1--17:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610535",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610535",
  abstract =     "Graph Neural Networks (GNNs) are a popular machine
                 learning framework for solving various graph processing
                 applications. This framework exploits both the graph
                 topology and the feature vectors of the nodes. One of
                 the important applications of GNN is in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "17",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2024:SSD,
  author =       "Ke-Jia Chen and Linsong Liu and Linpu Jiang and
                 Jingqiang Chen",
  title =        "Self-Supervised Dynamic Graph Representation Learning
                 via Temporal Subgraph Contrast",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "18:1--18:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3612931",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3612931",
  abstract =     "Self-supervised learning on graphs has recently drawn
                 a lot of attention due to its independence from labels
                 and its robustness in representation. Current studies
                 on this topic mainly use static information such as
                 graph structures but cannot well capture \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "18",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Sun:2024:LBC,
  author =       "Yan Sun and Yi Han and Jicong Fan",
  title =        "{Laplacian}-based Cluster-Contractive $t$-{SNE} for
                 High-Dimensional Data Visualization",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "19:1--19:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3612932",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3612932",
  abstract =     "Dimensionality reduction techniques aim at
                 representing high-dimensional data in low-dimensional
                 spaces to extract hidden and useful information or
                 facilitate visual understanding and interpretation of
                 the data. However, few of them take into \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "19",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ge:2024:DCC,
  author =       "Yong-Feng Ge and Elisa Bertino and Hua Wang and Jinli
                 Cao and Yanchun Zhang",
  title =        "Distributed Cooperative Coevolution of Data Publishing
                 Privacy and Transparency",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "20:1--20:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3613962",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3613962",
  abstract =     "Data transparency is beneficial to data participants'
                 awareness, users' fairness, and research work's
                 reproducibility. However, when addressing transparency
                 requirements, we cannot ignore data privacy. This
                 article defines the multi-objective data \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "20",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Strukova:2024:AKI,
  author =       "Sofia Strukova and Jos{\'e} A. Ruip{\'e}rez-Valiente
                 and F{\'e}lix G{\'o}mez M{\'a}rmol",
  title =        "Adapting Knowledge Inference Algorithms to Measure
                 Geometry Competencies through a Puzzle Game",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "21:1--21:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3614436",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3614436",
  abstract =     "The rapid technological evolution of the last years
                 has motivated students to develop capabilities that
                 will prepare them for an unknown future in the 21st
                 century. In this context, many teachers intend to
                 optimise the learning process, making it more
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "21",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2024:ETL,
  author =       "Bo Liu and Liangjiao Li and Yanshan Xiao and Kai Wang
                 and Jian Hu and Junrui Liu and Qihang Chen and Ruiguang
                 Huang",
  title =        "An Efficient Transfer Learning Method with Auxiliary
                 Information",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "22:1--22:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3612930",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3612930",
  abstract =     "Transfer learning (TL) is an information reuse
                 learning tool, which can help us learn better
                 classification effect than traditional single task
                 learning, because transfer learning can share
                 information within the task-to-task model. Most TL
                 algorithms are \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "22",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2024:SEA,
  author =       "Zhong Li and Yuxuan Zhu and Matthijs {Van Leeuwen}",
  title =        "A Survey on Explainable Anomaly Detection",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "23:1--23:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3609333",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3609333",
  abstract =     "In the past two decades, most research on anomaly
                 detection has focused on improving the accuracy of the
                 detection, while largely ignoring the explainability of
                 the corresponding methods and thus leaving the
                 explanation of outcomes to practitioners. As \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "23",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Jiang:2024:TLA,
  author =       "Meng Jiang",
  title =        "Transfer Learning across Graph Convolutional Networks:
                 Methods, Theory, and Applications",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "24:1--24:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617376",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617376",
  abstract =     "Graph neural networks have been widely used for
                 learning representations of nodes for many downstream
                 tasks on graph data. Existing models were designed for
                 the nodes on a single graph, which would not be able to
                 utilize information across multiple \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "24",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2024:CQM,
  author =       "Lizhen Wang and Vanha Tran and Thanhcong Do",
  title =        "A Clique-Querying Mining Framework for Discovering
                 High Utility Co-Location Patterns without Generating
                 Candidates",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "25:1--25:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617378",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617378",
  abstract =     "Groups of spatial features whose instances frequently
                 appear together in nearby areas are regarded as
                 prevalent co-location patterns (PCPs). Traditional PCP
                 mining ignores the significance of instances and
                 features. However, in reality, these instances
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "25",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fu:2024:MUC,
  author =       "Zhe Fu and Xi Niu",
  title =        "Modeling Users' Curiosity in Recommender Systems",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "26:1--26:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617598",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617598",
  abstract =     "Today's recommender systems are criticized for
                 recommending items that are too obvious to arouse
                 users' interests. Therefore, the research community has
                 advocated some ``beyond accuracy'' evaluation metrics
                 such as novelty, diversity, and serendipity with
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "26",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2024:OGC,
  author =       "Hui-Jia Li and Yuhao Feng and Chengyi Xia and Jie
                 Cao",
  title =        "Overlapping Graph Clustering in Attributed Networks
                 via Generalized Cluster Potential Game",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "27:1--27:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3597436",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3597436",
  abstract =     "Overlapping graph clustering is essential to
                 understand the nature and behavior of real complex
                 systems including human interactions, technical systems
                 and transportation network. However, in addition of
                 topological structure, many real-world networked
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "27",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2024:UKG,
  author =       "Yu Liu and Zhilun Zhou and Yong Li and Depeng Jin",
  title =        "Urban Knowledge Graph Aided Mobile User Profiling",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "28:1--28:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3596604",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3596604",
  abstract =     "Nowadays, the explosive growth of personalized web
                 applications and the rapid development of artificial
                 intelligence technology have flourished the recent
                 research on mobile user profiling, i.e., inferring the
                 user profile from mobile behavioral data. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "28",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Kaibiao:2024:ANG,
  author =       "Lin Kaibiao and Jinpo Chen and Chen Ruicong and Yang
                 Fan and Zhang Yang and Lin Min and Lu Ping",
  title =        "Adaptive Neighbor Graph Aggregated Graph Attention
                 Network for Heterogeneous Graph Embedding",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "29:1--29:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3616377",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3616377",
  abstract =     "Graph attention network can generate effective feature
                 embedding by specifying different weights to different
                 nodes. The key of the research on heterogeneous graph
                 embedding is the way to combine its rich structural
                 information with semantic relations to \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "29",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2024:MMG,
  author =       "Yashen Wang and Xiaoye Ouyang and Dayu Guo and
                 Xiaoling Zhu",
  title =        "{MEGA}: Meta-Graph Augmented Pre-Training Model for
                 Knowledge Graph Completion",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  pages =        "30:1--30:??",
  month =        jan,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617379",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:43 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617379",
  abstract =     "Nowadays, a large number of Knowledge Graph Completion
                 (KGC) methods have been proposed by using embedding
                 based manners, to overcome the incompleteness problem
                 faced with knowledge graph (KG). One important recent
                 innovation in Natural Language \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "30",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2023:SDR,
  author =       "Xiang Wang and Liping Jing and Huafeng Liu and Jian
                 Yu",
  title =        "Structure-Driven Representation Learning for Deep
                 Clustering",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  publisher =    "Association for Computing Machinery (ACM)",
  pages =        "1--25",
  month =        oct,
  year =         "2023",
  DOI =          "https://doi.org/10.1145/3623400",
  ISSN =         "1556-472X",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:07:57 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  articleno =    "31",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1054",
}

@Article{Alam:2023:DIP,
  author =       "Md. Tanvir Alam and Chowdhury Farhan Ahmed and Md.
                 Samiullah and Carson Kai-Sang Leung",
  title =        "Discovering Interesting Patterns from Hypergraphs",
  journal =      j-TKDD,
  volume =       "18",
  number =       "1",
  publisher =    "Association for Computing Machinery (ACM)",
  pages =        "1--34",
  month =        oct,
  year =         "2023",
  DOI =          "https://doi.org/10.1145/3622940",
  ISSN =         "1556-472X",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:07:57 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  acknowledgement = ack-nhfb,
  articleno =    "32",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "http://portal.acm.org/browse_dl.cfm?idx=J1054",
}

@Article{Li:2024:ECB,
  author =       "Fangfang Li and Zhi Liu and Junwen Duan and Xingliang
                 Mao and Heyuan Shi and Shichao Zhang",
  title =        "Exploiting Conversation-Branch-Tweet {HyperGraph}
                 Structure to Detect Misinformation on Social Media",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "33:1--33:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610297",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610297",
  abstract =     "The spread of misinformation on social media is a
                 serious issue that can have negative consequences for
                 public health and political stability. While detecting
                 and identifying misinformation can be challenging, many
                 attempts have been made to address this \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "33",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Luo:2024:SSG,
  author =       "Xiao Luo and Wei Ju and Yiyang Gu and Zhengyang Mao
                 and Luchen Liu and Yuhui Yuan and Ming Zhang",
  title =        "Self-supervised Graph-level Representation Learning
                 with Adversarial Contrastive Learning",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "34:1--34:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624018",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624018",
  abstract =     "The recently developed unsupervised graph
                 representation learning approaches apply contrastive
                 learning into graph-structured data and achieve
                 promising performance. However, these methods mainly
                 focus on graph augmentation for positive samples, while
                 the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "34",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Miller:2024:ASP,
  author =       "Benjamin A. Miller and Zohair Shafi and Wheeler Ruml
                 and Yevgeniy Vorobeychik and Tina Eliassi-Rad and Scott
                 Alfeld",
  title =        "Attacking Shortest Paths by Cutting Edges",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "35:1--35:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3622941",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3622941",
  abstract =     "Identifying shortest paths between nodes in a network
                 is a common graph analysis problem that is important
                 for many applications involving routing of resources.
                 An adversary that can manipulate the graph structure
                 could alter traffic patterns to gain some \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "35",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Spinnato:2024:UTS,
  author =       "Francesco Spinnato and Riccardo Guidotti and Anna
                 Monreale and Mirco Nanni and Dino Pedreschi and Fosca
                 Giannotti",
  title =        "Understanding Any Time Series Classifier with a
                 Subsequence-based Explainer",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "36:1--36:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624480",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624480",
  abstract =     "The growing availability of time series data has
                 increased the usage of classifiers for this data type.
                 Unfortunately, state-of-the-art time series classifiers
                 are black-box models and, therefore, not usable in
                 critical domains such as healthcare or \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "36",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yu:2024:FSE,
  author =       "Kui Yu and Zhaolong Ling and Lin Liu and Peipei Li and
                 Hao Wang and Jiuyong Li",
  title =        "Feature Selection for Efficient Local-to-global
                 {Bayesian} Network Structure Learning",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "37:1--37:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624479",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624479",
  abstract =     "Local-to-global learning approach plays an essential
                 role in Bayesian network (BN) structure learning.
                 Existing local-to-global learning algorithms first
                 construct the skeleton of a DAG (directed acyclic
                 graph) by learning the MB (Markov blanket) or PC (.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "37",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cai:2024:RDS,
  author =       "Ruichu Cai and Fengzhu Wu and Zijian Li and Jie Qiao
                 and Wei Chen and Yuexing Hao and Hao Gu",
  title =        "{REST}: Debiased Social Recommendation via
                 Reconstructing Exposure Strategies",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "38:1--38:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624986",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624986",
  abstract =     "The recommendation system, relying on historical
                 observational data to model the complex relationships
                 among users and items, has achieved great success in
                 real-world applications. Selection bias is one of the
                 most important issues of the existing \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "38",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ye:2024:MDF,
  author =       "Xiaoqing Ye and Yang Sun and Dun Liu and Tianrui Li",
  title =        "A Multisource Data Fusion-based Heterogeneous Graph
                 Attention Network for Competitor Prediction",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "39:1--39:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3625101",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625101",
  abstract =     "Competitor identification is an essential component of
                 corporate strategy. With the rapid development of
                 artificial intelligence, various data-mining
                 methodologies and frameworks have emerged to identify
                 competitors. In general, the competitiveness among
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "39",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhang:2024:FPP,
  author =       "Taolin Zhang and Chengyuan Mai and Yaomin Chang and
                 Chuan Chen and Lin Shu and Zibin Zheng",
  title =        "{FedEgo}: Privacy-preserving Personalized Federated
                 Graph Learning with Ego-graphs",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "40:1--40:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3624017",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3624017",
  abstract =     "As special information carriers containing both
                 structure and feature information, graphs are widely
                 used in graph mining, e.g., Graph Neural Networks
                 (GNNs). However, graph data are stored separately in
                 multiple distributed parties in some practical
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "40",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Lu:2024:CTT,
  author =       "Xiangkui Lu and Jun Wu and Junheng Huang and Fangyuan
                 Luo and Jianbo Yuan",
  title =        "Co-Training-Teaching: a Robust Semi-Supervised
                 Framework for Review-Aware Rating Regression",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "41:1--41:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3625391",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625391",
  abstract =     "Review-aware Rating Regression (RaRR) suffers the
                 severe challenge of extreme data sparsity as the
                 multi-modality interactions of ratings accompanied by
                 reviews are costly to obtain. Although some studies of
                 semi-supervised rating regression are proposed
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "41",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zareie:2024:MDE,
  author =       "Ahmad Zareie and Rizos Sakellariou",
  title =        "Maximizing the Diversity of Exposure in Online Social
                 Networks by Identifying Users with Increased
                 Susceptibility to Persuasion",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "42:1--42:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3625826",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3625826",
  abstract =     "Individuals may have a range of opinions on
                 controversial topics. However, the ease of making
                 friendships in online social networks tends to create
                 groups of like-minded individuals, who propagate
                 messages that reinforce existing opinions and ignore
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "42",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xu:2024:OWG,
  author =       "Hui Xu and Liyao Xiang and Junjie Ou and Yuting Weng
                 and Xinbing Wang and Chenghu Zhou",
  title =        "Open-World Graph Active Learning for Node
                 Classification",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "43:1--43:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3607144",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3607144",
  abstract =     "The great power of Graph Neural Networks (GNNs) relies
                 on a large number of labeled training data, but
                 obtaining the labels can be costly in many cases. Graph
                 Active Learning (GAL) is proposed to reduce such
                 annotation costs, but the existing methods \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "43",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2024:SSH,
  author =       "Ying Chen and Siwei Qiang and Mingming Ha and Xiaolei
                 Liu and Shaoshuai Li and Jiabi Tong and Lingfeng Yuan
                 and Xiaobo Guo and Zhenfeng Zhu",
  title =        "Semi-Supervised Heterogeneous Graph Learning with
                 Multi-Level Data Augmentation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "44:1--44:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3608953",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3608953",
  abstract =     "In recent years, semi-supervised graph learning with
                 data augmentation (DA) has been the most commonly used
                 and best-performing method to improve model robustness
                 in sparse scenarios with few labeled samples. However,
                 most existing DA methods are based on \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "44",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2024:RET,
  author =       "Huan Wang and Ruigang Liu and Chuanqi Shi and Junyang
                 Chen and Lei Fang and Shun Liu and Zhiguo Gong",
  title =        "Resisting the Edge-Type Disturbance for Link
                 Prediction in Heterogeneous Networks",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "45:1--45:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3614099",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3614099",
  abstract =     "The rapid development of heterogeneous networks has
                 proposed new challenges to the long-standing link
                 prediction problem. Existing models trained on the
                 verified edge samples from different types usually
                 learn type-specific knowledge, and their type-.
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "45",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Li:2024:ASG,
  author =       "Xiaoting Li and Lingwei Chen and Dinghao Wu",
  title =        "Adversary for Social Good: Leveraging Adversarial
                 Attacks to Protect Personal Attribute Privacy",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "46:1--46:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3614098",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3614098",
  abstract =     "Social media has drastically reshaped the world that
                 allows billions of people to engage in such interactive
                 environments to conveniently create and share content
                 with the public. Among them, text data (e.g., tweets,
                 blogs) maintains the basic yet \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "46",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yan:2024:GMC,
  author =       "Bo Yan and Cheng Yang and Chuan Shi and Yong Fang and
                 Qi Li and Yanfang Ye and Junping Du",
  title =        "Graph Mining for Cybersecurity: a Survey",
  journal =      j-TKDD,
  volume =       "18",
  number =       "2",
  pages =        "47:1--47:??",
  month =        feb,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3610228",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:46 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3610228",
  abstract =     "The explosive growth of cyber attacks today, such as
                 malware, spam, and intrusions, has caused severe
                 consequences on society. Securing cyberspace has become
                 a great concern for organizations and governments.
                 Traditional machine learning based methods are
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "47",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2024:MII,
  author =       "Qiang Huang and Jing Ma and Jundong Li and Ruocheng
                 Guo and Huiyan Sun and Yi Chang",
  title =        "Modeling Interference for Individual Treatment Effect
                 Estimation from Networked Observational Data",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "48:1--48:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3628449",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3628449",
  abstract =     "Estimating individual treatment effect (ITE) from
                 observational data has attracted great interest in
                 recent years, which plays a crucial role in
                 decision-making across many high-impact domains such as
                 economics, medicine, and e-commerce. Most existing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "48",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Gonzalez-Zelaya:2024:FPD,
  author =       "Vladimiro Gonz{\'a}lez-Zelaya and Juli{\'a}n Salas and
                 David Meg{\'\i}as and Paolo Missier",
  title =        "Fair and Private Data Preprocessing through
                 Microaggregation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "49:1--49:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3617377",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3617377",
  abstract =     "Privacy protection for personal data and fairness in
                 automated decisions are fundamental requirements for
                 responsible Machine Learning. Both may be enforced
                 through data preprocessing and share a common target:
                 data should remain useful for a task, while \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "49",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cheng:2024:AFS,
  author =       "Ling Cheng and Feida Zhu and Yong Wang and Ruicheng
                 Liang and Huiwen Liu",
  title =        "From Asset Flow to Status, Action, and Intention
                 Discovery: Early Malice Detection in Cryptocurrency",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "50:1--50:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3626102",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3626102",
  abstract =     "Cryptocurrency has been subject to illicit activities
                 probably more often than traditional financial assets
                 due to the pseudo-anonymous nature of its transacting
                 entities. An ideal detection model is expected to
                 achieve all three critical properties of \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "50",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2024:EEF,
  author =       "Yimin Huang and Wanwan Wang and Xingying Zhao and
                 Yukun Wang and Xinyu Feng and Hao He and Ming Yao",
  title =        "{EFMVFL}: an Efficient and Flexible Multi-party
                 Vertical Federated Learning without a Third Party",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "51:1--51:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3627993",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3627993",
  abstract =     "Federated learning (FL) is a machine learning setting
                 which allows multiple participants collaboratively to
                 train a model under the orchestration of a server
                 without disclosing their local data. Vertical federated
                 learning (VFL) is a special structure in \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "51",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Pellegrina:2024:SEB,
  author =       "Leonardo Pellegrina and Fabio Vandin",
  title =        "{SILVAN}: Estimating Betweenness Centralities with
                 Progressive Sampling and Non-uniform {Rademacher}
                 Bounds",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "52:1--52:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3628601",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3628601",
  abstract =     "``Sim Sala Bim!'' -Silvan, Betweenness centrality is a
                 popular centrality measure with applications in several
                 domains and whose exact computation is impractical for
                 modern-sized networks. We present SILVAN, \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "52",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Xia:2024:HEU,
  author =       "Tong Xia and Yong Li and Yunhan Qi and Jie Feng and
                 Fengli Xu and Funing Sun and Diansheng Guo and Depeng
                 Jin",
  title =        "History-enhanced and Uncertainty-aware Trajectory
                 Recovery via Attentive Neural Network",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "53:1--53:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3615660",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3615660",
  abstract =     "A considerable amount of mobility data has been
                 accumulated due to the proliferation of location-based
                 services. Nevertheless, compared with mobility data
                 from transportation systems like the GPS module in
                 taxis, this kind of data is commonly sparse in
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "53",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ao:2024:PYV,
  author =       "Xiang Ao and Ling Luo and Xiting Wang and Zhao Yang
                 and Jiun-Hung Chen and Ying Qiao and Qing He and Xing
                 Xie",
  title =        "Put Your Voice on Stage: Personalized Headline
                 Generation for News Articles",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "54:1--54:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3629168",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3629168",
  abstract =     "In this article, we study the problem of personalized
                 news headline generation, which aims to produce not
                 only concise and fact-consistent titles for news
                 articles but also decorate these titles as personalized
                 irresistible reading invitations by \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "54",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Chen:2024:GAG,
  author =       "Ling Chen and Jiahui Xu and Binqing Wu and Jianlong
                 Huang",
  title =        "Group-Aware Graph Neural Network for Nationwide City
                 Air Quality Forecasting",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "55:1--55:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3631713",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631713",
  abstract =     "The problem of air pollution threatens public health.
                 Air quality forecasting can provide the air quality
                 index hours or even days later, which can help the
                 public to prevent air pollution in advance. Previous
                 works focus on citywide air quality \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "55",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liang:2024:LEI,
  author =       "Yunji Liang and Lei Liu and Luwen Huangfu and Sagar
                 Samtani and Zhiwen Yu and Daniel D. Zeng",
  title =        "Learning Entangled Interactions of Complex Causality
                 via Self-Paced Contrastive Learning",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "56:1--56:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632406",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632406",
  abstract =     "Learning causality from large-scale text corpora is an
                 important task with numerous applications-for example,
                 in finance, biology, medicine, and scientific
                 discovery. Prior studies have focused mainly on simple
                 causality, which only includes one cause-. \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "56",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Hsu:2024:AAC,
  author =       "Chi-Wei Hsu and Chiao-Ting Chen and Szu-Hao Huang",
  title =        "Adaptive Adversarial Contrastive Learning for
                 Cross-Domain Recommendation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "57:1--57:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3630259",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3630259",
  abstract =     "Graph-based cross-domain recommendations (CDRs) are
                 useful for suggesting appropriate items because of
                 their promising ability to extract features from
                 user-item interactions and transfer knowledge across
                 domains. Thus, the model can effectively alleviate
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "57",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ou:2024:SBO,
  author =       "Weitong Ou and Bo Chen and Xinyi Dai and Weinan Zhang
                 and Weiwen Liu and Ruiming Tang and Yong Yu",
  title =        "A Survey on Bid Optimization in Real-Time Bidding
                 Display Advertising",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "58:1--58:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3628603",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3628603",
  abstract =     "Real-Time Bidding (RTB) is one of the most important
                 forms of online advertising, where an auction is hosted
                 in real time to sell the individual ad impression. How
                 to design an automated bidding strategy in response to
                 the dynamic auction environment is \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "58",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Ni:2024:LOS,
  author =       "Li Ni and Hefei Xu and Yiwen Zhang and Wenjian Luo and
                 Yingying Huang and Victor S. Sheng",
  title =        "Local Overlapping Spatial-aware Community Detection",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "59:1--59:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3634707",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3634707",
  abstract =     "Local spatial-aware community detection refers to
                 detecting a spatial-aware community for a given node
                 using local information. A spatial-aware community
                 means that nodes in the community are tightly connected
                 in structure, and their locations are close \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "59",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Cai:2024:GDA,
  author =       "Ruichu Cai and Fengzhu Wu and Zijian Li and Pengfei
                 Wei and Lingling Yi and Kun Zhang",
  title =        "Graph Domain Adaptation: a Generative View",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "60:1--60:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3631712",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631712",
  abstract =     "Recent years have witnessed tremendous interest in
                 deep learning on graph-structured data. Due to the high
                 cost of collecting labeled graph-structured data,
                 domain adaptation is important to supervised graph
                 learning tasks with limited samples. However,
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "60",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Su:2024:CBF,
  author =       "Cong Su and Guoxian Yu and Yongqing Zheng and Jun Wang
                 and Zhengtian Wu and Xiangliang Zhang and Carlotta
                 Domeniconi",
  title =        "Causality-Based Fair Multiple Decision by Response
                 Functions",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "61:1--61:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632529",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632529",
  abstract =     "A recent trend of fair machine learning is to build a
                 decision model subjected to causality-based fairness
                 requirements, which concern with the causality between
                 sensitive attributes and decisions. Almost all (if not
                 all) solutions focus on a single fair \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "61",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Han:2024:LUB,
  author =       "Di Han and Yifan Huang and Junmin Liu and Kai Liao and
                 Kunling Lin",
  title =        "{LSAB}: User Behavioral Pattern Modeling in Sequential
                 Recommendation by Learning Self-Attention Bias",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "62:1--62:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632625",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632625",
  abstract =     "Since the weight of a self-attention model is not
                 affected by the sequence interval, it can more
                 accurately and completely describe the user interests,
                 so it is widely used in processing sequential
                 recommendation. However, the mainstream self-attention
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "62",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Huang:2024:LCP,
  author =       "Shenyang Huang and Samy Coulombe and Yasmeen Hitti and
                 Reihaneh Rabbany and Guillaume Rabusseau",
  title =        "{Laplacian} Change Point Detection for Single and
                 Multi-view Dynamic Graphs",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "63:1--63:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3631609",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3631609",
  abstract =     "Dynamic graphs are rich data structures that are used
                 to model complex relationships between entities over
                 time. In particular, anomaly detection in temporal
                 graphs is crucial for many real-world applications such
                 as intrusion identification in network \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "63",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Saha:2024:PPN,
  author =       "Swapnil Saha and Hafiz Imtiaz",
  title =        "Privacy-Preserving Non-Negative Matrix Factorization
                 with Outliers",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "64:1--64:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3632961",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3632961",
  abstract =     "Non-negative matrix factorization is a popular
                 unsupervised machine learning algorithm for extracting
                 meaningful features from inherently non-negative data.
                 Such data often contain privacy-sensitive user
                 information. Additionally, the dataset can contain
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "64",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Yang:2024:SGI,
  author =       "Ming-Chuan Yang and Guo-Wei Wong and Meng Chang Chen",
  title =        "Sparse Grid Imputation Using Unpaired Imprecise
                 Auxiliary Data: Theory and Application to {PM2.5}
                 Estimation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "65:1--65:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3634751",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3634751",
  abstract =     "Sparse grid imputation (SGI) is a challenging problem,
                 as its goal is to infer the values of the entire grid
                 from a limited number of cells with values.
                 Traditionally, the problem is solved using regression
                 methods such as KNN and kriging, whereas in the
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "65",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Zhao:2024:PAD,
  author =       "Han Zhao and Xu Yang and Cheng Deng",
  title =        "Parameter-Agnostic Deep Graph Clustering",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "66:1--66:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3633783",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3633783",
  abstract =     "Deep graph clustering, efficiently dividing nodes into
                 multiple disjoint clusters in an unsupervised manner,
                 has become a crucial tool for analyzing ubiquitous
                 graph data. Existing methods have acquired impressive
                 clustering effects by optimizing the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "66",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Wang:2024:LHT,
  author =       "Song Wang and Yushun Dong and Xiao Huang and Chen Chen
                 and Jundong Li",
  title =        "Learning Hierarchical Task Structures for Few-shot
                 Graph Classification",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "67:1--67:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3635473",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3635473",
  abstract =     "The problem of few-shot graph classification targets
                 at assigning class labels for graph samples, where only
                 limited labeled graphs are provided for each class. To
                 solve the problem brought by label scarcity, recent
                 studies have proposed to adopt the \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "67",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Rong:2024:TST,
  author =       "Huan Rong and Xin Yu and Tinghuai Ma and Victor S.
                 Sheng and Yang Zhou and Al-Rodhaan Mznah",
  title =        "Three-stage Transferable and Generative Crowdsourced
                 Comment Integration Framework Based on Zero- and
                 Few-shot Learning with Domain Distribution Alignment",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "68:1--68:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3636511",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3636511",
  abstract =     "Online shopping has become a crucial way to encourage
                 daily consumption, where the User-generated, or
                 crowdsourced product comments, can offer a broad range
                 of feedback on e-commerce products. As a result,
                 integrating critical opinions or major attitudes
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "68",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{DiPalma:2024:EVS,
  author =       "Luciano {Di Palma} and Yanlei Diao and Anna Liu",
  title =        "Efficient Version Space Algorithms for
                 Human-in-the-loop Model Development",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "69:1--69:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3637443",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3637443",
  abstract =     "When active learning (AL) is applied to help users
                 develop a model on a large dataset through
                 interactively presenting data instances for labeling,
                 existing AL techniques often suffer from two main
                 drawbacks: First, to reach high accuracy they may
                 require \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "69",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Tipirneni:2024:SSA,
  author =       "Sindhu Tipirneni and Ming Zhu and Chandan K. Reddy",
  title =        "{StructCoder}: Structure-Aware Transformer for Code
                 Generation",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "70:1--70:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3636430",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3636430",
  abstract =     "There has been a recent surge of interest in
                 automating software engineering tasks using deep
                 learning. This article addresses the problem of code
                 generation, in which the goal is to generate target
                 code given source code in a different language or a
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "70",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Fan:2024:DDE,
  author =       "Wei Fan and Yanjie Fu and Shun Zheng and Jiang Bian
                 and Yuanchun Zhou and Hui Xiong",
  title =        "{DEWP}: Deep Expansion Learning for Wind Power
                 Forecasting",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "71:1--71:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3637552",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3637552",
  abstract =     "Wind is one kind of high-efficient,
                 environmentally-friendly, and cost-effective energy
                 source. Wind power, as one of the largest renewable
                 energy in the world, has been playing a more and more
                 important role in supplying electricity. Though growing
                 \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "71",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}

@Article{Liu:2024:EFC,
  author =       "Zhe Liu and Sukumar Letchmunan",
  title =        "Enhanced Fuzzy Clustering for Incomplete Instance with
                 Evidence Combination",
  journal =      j-TKDD,
  volume =       "18",
  number =       "3",
  pages =        "72:1--72:??",
  month =        apr,
  year =         "2024",
  CODEN =        "????",
  DOI =          "https://doi.org/10.1145/3638061",
  ISSN =         "1556-4681 (print), 1556-472X (electronic)",
  ISSN-L =       "1556-4681",
  bibdate =      "Mon Jan 15 11:01:47 MST 2024",
  bibsource =    "https://www.math.utah.edu/pub/tex/bib/tkdd.bib",
  URL =          "https://dl.acm.org/doi/10.1145/3638061",
  abstract =     "Clustering incomplete instance is still a challenging
                 task since missing values maybe make the cluster
                 information ambiguous, leading to the uncertainty and
                 imprecision in results. This article investigates an
                 enhanced fuzzy clustering with evidence \ldots{}",
  acknowledgement = ack-nhfb,
  ajournal =     "ACM Trans. Knowl. Discov. Data",
  articleno =    "72",
  fjournal =     "ACM Transactions on Knowledge Discovery from Data
                 (TKDD)",
  journal-URL =  "https://dl.acm.org/loi/tkdd",
}